{'id': None, 'metadata': {'producer': 'Acrobat Distiller 9.5.5 (Macintosh)', 'creator': 'QuarkXPress 9.5.3.1', 'creationdate': '2015-03-09T10:16:51-07:00', 'author': 'cyrille', 'gts_pdfxversion': 'PDF/X-3:2002', 'moddate': '2015-05-18T11:25:26-07:00', 'title': '[T2R] rules EN reprint 2015_TTR2 rules US', 'trapped': '/False', 'source': 'data/ticket_to_ride.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, 'page_content': 'O\nn a blustery autumn evening five old friends met in the backroom of one of the city’s oldest and most private clubs. Each had\ntraveled a long distance — from all corners of the world — to meet on this very specific day… October 2, 1900 — 28 years to the\nday that the London eccentric, Phileas Fogg accepted and then won a £20,000 bet that he could travel Around the World in 80 Days . \nWhen the story of Fogg’s triumphant journey filled all the newspapers of the day, the five attended University together. Inspired by\nhis impetuous gamble, and a few pints from the local pub, the group commemorated his circumnavigation with a more modest excur-\nsion and wager – a bottle of good claret to the first to make it to Le Procope in Paris.', 'type': 'Document'}
# This will create IDs like "data/monopoly.pdf:6:2" 表示monopoly这个pdf的第六页中的第3个chunk # Page Source : Page Number : Chunk Index
last_page_id = None current_chunk_index = 0
for chunk in chunks: source = chunk.metadata.get("source") page = chunk.metadata.get("page") current_page_id = f"{source}:{page}"
# If the page ID is the same as the last one, increment the index. if current_page_id == last_page_id: current_chunk_index += 1 else: current_chunk_index = 0
document_1 = Document( page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.", metadata={"source": "tweet"}, id=1, )
document_2 = Document( page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.", metadata={"source": "news"}, id=2, )
documents = [document_1, document_2]
uuids = [str(uuid4()) for _ inrange(len(documents))]
PROMPT_TEMPLATE = """ Answer the question based only on the following context: {context} --- Answer the question based on the above context: {question} """
## 将文本连起来,中间用\n\n---\n\n分隔 context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) prompt = prompt_template.format(context=context_text, question=query_text) # print(prompt)
model = OllamaLLM(model="mistral") response_text = model.invoke(prompt)
sources = [doc.metadata.get("id", None) for doc, _score in results] formatted_response = f"Response: {response_text}\nSources: {sources}" print(formatted_response) return response_text