fromlangchain_community.document_loadersimportPDFPlumberLoaderfromlangchain.text_splitterimportRecursiveCharacterTextSplitterfile="DeepSeek_R1.pdf"# 加载 PDF 文件loader=PDFPlumberLoader(file)docs=loader.load()text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=0)all_splits=text_splitter.split_documents(docs)
fromlangchain_core.output_parsersimportStrOutputParserfromlangchain_core.promptsimportChatPromptTemplatefromlangchain_ollamaimportChatOllamamodel=ChatOllama(model="deepseek-r1:1.5b")prompt=ChatPromptTemplate.from_template("Summarize the main themes in these retrieved docs: {docs}")# 将传入的文档转换成字符串的形式defformat_docs(docs):return"\n\n".join(doc.page_contentfordocindocs)chain={"docs":format_docs}|prompt|model|StrOutputParser()question="What is the purpose of the DeepSeek project?"docs=vectorstore.similarity_search(question)chain.invoke(docs)
fromlangchain_core.runnablesimportRunnablePassthroughRAG_TEMPLATE="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<context>{context}</context>Answer the following question:{question}"""rag_prompt=ChatPromptTemplate.from_template(RAG_TEMPLATE)retriever=vectorstore.as_retriever()qa_chain=({"context":retriever|format_docs,"question":RunnablePassthrough()}|rag_prompt|model|StrOutputParser())question="What is the purpose of the DeepSeek project?"# Runqa_chain.invoke(question)