Zack Saadioui
8/24/2024
1
2
bash
pip install langchain
1
2
bash
pip install beautifulsoup4 unstructured
1
UnstructuredHTMLLoader
1 2 3 4 5
from langchain_community.document_loaders import UnstructuredHTMLLoader loader = UnstructuredHTMLLoader("example_data/fake-content.html") data = loader.load() print(data)
1
BSHTMLLoader
1 2 3 4 5
from langchain_community.document_loaders import BSHTMLLoader loader = BSHTMLLoader("example_data/fake-content.html") data = loader.load() print(data)
1
BSHTMLLoader
1 2 3 4 5
from langchain_community.document_loaders import SpiderLoader loader = SpiderLoader(api_key="YOUR_API_KEY", url="https://spider.cloud", mode="crawl") data = loader.load() print(data)
1
SpiderLoader
1
FireCrawl
1 2 3 4
from langchain_community.document_loaders import FireCrawlLoader loader = FireCrawlLoader(api_key="YOUR_API_KEY", url="https://firecrawl.dev", mode="crawl") data = loader.load()
1
AzureAIDocumentIntelligenceLoader
1 2 3 4
from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader loader = AzureAIDocumentIntelligenceLoader(api_endpoint="<endpoint>", api_key="<key>", file_path="<filepath>", api_model="prebuilt-layout") documents = loader.load()
Copyright © Arsturn 2025