Guide: Implementing Citations with Perplexity and GCP Gemini Model
This guide demonstrates how to implement and utilize citation using both Perplexity and GCP Gemini through GIP.
We'll explore key features, implementation patterns, and use cases for each system.
Overview of Citation Systems
Perplexity Citations
Simple URL-based citation system that returns source links for generated content.
GCP Gemini Grounding
Comprehensive system providing detailed metadata about sources and their relevance.
Implementation Examples
1. Perplexity Citation Implementation
Basic setup and usage:
from openai import OpenAI
client = OpenAI(
api_key="your_api_key",
base_url="https://api.platform.a15t.com/v1"
)
def get_perplexity_response(query: str):
response = client.chat.completions.create(
model="perplexity/llama-3.1-sonar-small-128k-online",
messages=[{"role": "user", "content": query}],
extra_body={
"model_extensions":{
"provider": "perplexity",
"return_citations": True,
"num_search_results": 5
}
}
)
content = response.choices[0].message.content
citations = response.model_extensions.get("citations", [])
return content, citations
Example response structure:
{
"content": "Answer about quantum computing...",
"citations": [
"https://source1.com/article",
"https://source2.com/research"
]
}
2. GCP Gemini Grounding Implementation
Basic setup and usage(Gemini < 2.0):
def get_vertex_response(query: str):
response = client.chat.completions.create(
model="gcp/gemini-1.5-pro-002",
messages=[{"role": "user", "content": query}],
extra_body={
"model_extensions": {
"provider": "gcp",
"google_search_retrieval": {
"dynamic_retrieval_config": {
"dynamic_threshold": 0.7
}
}
}
}
)
content = response.choices[0].message.content
grounding = response.model_extensions.get("grounding_metadata", {})
return content, grounding
Example response structure(Gemini < 2.0):
{
"content": "Answer about quantum computing...",
"grounding_metadata": {
"grounding_chunks": [
{
"web": {
"title": "Research Article",
"uri": "https://source.com"
}
}
],
"grounding_supports": [
{
"grounding_chunk_indices": [0],
"confidence_scores": [0.95],
"segment": {
"text": "Referenced text portion"
}
}
]
}
}
Basic setup and usage(Gemini >= 2.0):
def get_vertex_response(query: str):
response = client.chat.completions.create(
model="gcp/gemini-2.0-flash-001",
messages=[{"role": "user", "content": query}],
extra_body={
"model_extensions": {
"provider": "gcp",
"google_search": {}
}
}
)
content = response.choices[0].message.content
grounding = response.model_extensions.get("grounding_metadata", {})
return content, grounding
Example response structure(Gemini >= 2.0):
{
"model_extensions": {
"grounding_metadata": {
"web_search_queries": [
"who won world series 2020"
],
"search_entry_point": {
"rendered_content": "<style>\n.c...n",
"sdk_blob": ""
},
"retrieval_queries": [],
"grounding_chunks": [
{
"web": {
"uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/...",
"title": "latimes.com"
}
},
...
],
"grounding_supports": [
{
"segment": {
"part_index": 0,
"start_index": 0,
"end_index": 88,
"text": "In 2020, the **Los Angeles Dodgers** won the World Series, defeating the Tampa Bay Rays."
},
"grounding_chunk_indices": [
0,
1,
2,
3,
4,
5
],
"confidence_scores": [
0.95567983,
0.93321663,
0.9251957,
0.8089572,
0.95722926,
0.8422677
]
},
...
]
}
}
}
Key Features Comparison
Perplexity Citations
- Configuration Options:
return_citations
: Boolean to enable citationsnum_search_results
: Control number of sourcessearch_recency_filter
: Filter by time period
- Response Processing:
def process_perplexity_citations(response):
citations = response.model_extensions.get("citations", [])
return [
f"[{i+1}] {url}"
for i, url in enumerate(citations)
]
GCP Gemini Grounding
- Configuration Options:
dynamic_threshold
: Control source relevance threshold
- Response Processing:
def process_vertex_grounding(response):
grounding = response.model_extensions["grounding_metadata"]
sources = []
for chunk in grounding.get("grounding_chunks", []):
if "web" in chunk:
sources.append({
"title": chunk["web"].get("title"),
"url": chunk["web"].get("uri"),
"confidence": next(
(support["confidence_scores"][i]
for support in grounding.get("grounding_supports", [])
for i, idx in enumerate(support["grounding_chunk_indices"])
if idx == len(sources)),
None
)
})
return sources
Use Case Recommendations
Choose Perplexity When:
- Simple citation tracking is needed
- Quick implementation is priority
- Basic source linking is sufficient
- URL-based citations are acceptable
Example use case:
content, citations = get_perplexity_response("Latest quantum computing developments?")
print(f"Answer: {content}\n\nSources:")
for i, url in enumerate(citations, 1):
print(f"[{i}] {url}")
Choose GCP Gemini When:
- Detailed source verification is required
- Confidence scoring is important
- Text-to-source mapping is needed
- Advanced grounding features are necessary
Example use case:
content, grounding = get_vertex_response("Latest quantum computing developments?")
sources = process_vertex_grounding(grounding)
print(f"Answer: {content}\n\nSources with Confidence:")
for source in sources:
print(f"- {source['title']}: {source['confidence']:.2f}")
Formatting Example
Perplexity Citation
from openai import OpenAI
def get_response_with_citations(query: str, api_key: str):
client = OpenAI(
api_key=api_key,
base_url="https://api.platform.a15t.com/v1"
)
# Get response with citations
response = client.chat.completions.create(
model="perplexity/llama-3.1-sonar-small-128k-online",
messages=[{"role": "user", "content": query}],
extra_body={
"model_extensions":{
"provider": "perplexity",
"return_citations": True,
"num_search_results": 3 # Limit to 3 sources
}
}
)
content = response.choices[0].message.content
citations = response.model_extensions.get("citations", [])
# Format response with inline citations
for i, citation in enumerate(citations, 1):
content = content.replace(". ", f".[{i}] ", 1)
# Add sources at the end
content += "\n\nSources:\n"
content += "\n".join(f"[{i}] {url}" for i, url in enumerate(citations, 1))
return content
# Usage example
result = get_response_with_citations(
"Who is the richest person in the world?",
"your-api-key"
)
print(result)
Result
Bernard Arnault is currently the richest person in the world.[1] His net worth is estimated at $211 billion.[2] He is the CEO of LVMH, the world's largest luxury goods company.[3]
Sources:
[1] https://www.forbes.com/billionaires/
[2] https://www.bloomberg.com/billionaires/
[3] https://www.lvmh.com/
GCP AI Citation Example
def get_grounded_response(query: str, api_key: str):
client = OpenAI(
api_key=api_key,
base_url="https://api.platform.a15t.com/v1"
)
# Get response with grounding
response = client.chat.completions.create(
model="gcp/gemini-1.5-pro-002",
messages=[{"role": "user", "content": query}],
extra_body={
"model_extensions":{
"provider": "gcp",
"google_search_retrieval": {}
}
}
)
content = response.choices[0].message.content
grounding = response.model_extensions.get("grounding_metadata", {})
# Extract sources from grounding metadata
sources = [
chunk["web"]["uri"]
for chunk in grounding.get("grounding_chunks", [])
if "web" in chunk
]
# Format response with inline citations
for i, source in enumerate(sources, 1):
content = content.replace(". ", f".[{i}] ", 1)
# Add sources at the end
content += "\n\nSources:\n"
content += "\n".join(f"[{i}] {url}" for i, url in enumerate(sources, 1))
return content
# Usage example
result = get_grounded_response(
"What are the latest developments in AI?",
"your-api-key"
)
print(result)