polen
Version:
A framework for delightful GraphQL developer portals
1,076 lines (945 loc) • 35.7 kB
text/typescript
/**
* Tree-sitter GraphQL parsing with semantic analysis
*
* This module combines tree-sitter syntax parsing with GraphQL semantic
* analysis to create unified tokens for interactive code blocks.
*/
import type { CodeAnnotation } from 'codehike/code'
import {
getNamedType,
type GraphQLArgument,
GraphQLEnumType,
type GraphQLField,
GraphQLInputObjectType,
GraphQLInterfaceType,
GraphQLObjectType,
type GraphQLOutputType,
GraphQLScalarType,
type GraphQLSchema,
GraphQLUnionType,
isInterfaceType,
isObjectType,
} from 'graphql'
import graphqlWasmUrl from 'tree-sitter-graphql-grammar-wasm/grammar.wasm?url'
import * as WebTreeSitter from 'web-tree-sitter'
import treeSitterWasmUrl from 'web-tree-sitter/tree-sitter.wasm?url'
import {
isKeywordNodeType,
isLiteralNodeType,
isPunctuationNodeType,
type TreeSitterGraphQLNodeType,
} from './graphql-node-types.js'
import type { SemanticNode } from './semantic-nodes.js'
import {
isArgument,
isFragment,
isInputField,
isInvalidField,
isOperation,
isOutputField,
isVariable,
} from './semantic-nodes.js'
/**
* Unified token structure that combines tree-sitter and GraphQL semantics
*
* This interface represents a single parsed token from a GraphQL document,
* enriched with semantic information from the GraphQL schema when available.
* It provides a consistent API for accessing syntax highlighting, interactivity,
* and CodeHike annotation data.
*
* @example
* ```typescript
* const tokens = await parseGraphQLWithTreeSitter(code, [], schema)
* const fieldToken = tokens.find(t => t.text === 'name')
*
* if (fieldToken?.polen.isInteractive()) {
* const url = fieldToken.polen.getReferenceUrl()
* console.log(`Navigate to: ${url}`)
* }
* ```
*/
export interface GraphQLToken {
/** Reference to the tree-sitter node that this token represents */
treeSitterNode: WebTreeSitter.Node
/**
* Optional semantic information from GraphQL schema analysis
* This includes type information, field definitions, and validation results
*/
semantic?: SemanticNode
/** Text content of the token (computed from tree-sitter node) */
get text(): string
/** Start character position in the source code (computed from tree-sitter node) */
get start(): number
/** End character position in the source code (computed from tree-sitter node) */
get end(): number
/** Polen specific functionality for interactive GraphQL documentation */
polen: {
/** Check if this token should be interactive (clickable/hoverable) */
isInteractive: () => boolean
/** Get the reference URL for navigation, or null if not applicable */
getReferenceUrl: () => string | null
}
/** Syntax highlighting functionality */
highlighter: {
/** Get the CSS class name for styling this token */
getCssClass: () => string
}
/** CodeHike integration for enhanced code block features */
codeHike: {
/** Array of CodeHike annotations that apply to this token */
annotations: CodeAnnotation[]
}
}
/**
* Implementation of unified token
*/
class UnifiedToken implements GraphQLToken {
public polen: { isInteractive: () => boolean; getReferenceUrl: () => string | null }
public highlighter: { getCssClass: () => string }
public codeHike: { annotations: CodeAnnotation[] }
// Cache these values to avoid WASM access issues
private _text: string
private _start: number
private _end: number
private _nodeType: TreeSitterGraphQLNodeType
constructor(
public treeSitterNode: WebTreeSitter.Node,
public semantic: SemanticNode | undefined,
annotations: CodeAnnotation[],
) {
// Cache the values immediately to avoid WASM access issues later
// This works for both real WebTreeSitter nodes and synthetic nodes
this._text = treeSitterNode.text
this._start = treeSitterNode.startIndex
this._end = treeSitterNode.endIndex
this._nodeType = treeSitterNode.type as TreeSitterGraphQLNodeType
this.codeHike = { annotations }
// Polen namespace
this.polen = {
isInteractive: () => this._isInteractive(),
getReferenceUrl: () => this._getReferenceUrl(),
}
// Highlighter namespace
this.highlighter = {
getCssClass: () => this._getCssClass(),
}
}
get text(): string {
return this._text
}
get start(): number {
return this._start
}
get end(): number {
return this._end
}
private _getCssClass(): string {
const nodeType = this._nodeType
// Development-only validation
if (process.env['NODE_ENV'] === 'development') {
// Validate that the node type is actually a valid TreeSitterGraphQLNodeType
const validTypes = new Set([
// Add a few common types for validation
'document',
'name',
'field',
'argument',
'variable',
'comment',
'error_hint',
'whitespace',
'string_value',
'int_value',
'float_value',
'query',
'mutation',
'subscription',
])
if (!validTypes.has(nodeType as any) && !nodeType.match(/^[a-z_]+$/)) {
console.warn(`Unknown tree-sitter node type: "${nodeType}". Consider adding to TreeSitterGraphQLNodeType.`)
}
}
// Error hints
if (nodeType === 'error_hint') {
return 'graphql-error-hint'
}
// Comments
if (nodeType === 'comment' || nodeType === 'description') {
return 'graphql-comment'
}
// Keywords
if (isKeywordNodeType(nodeType)) {
return 'graphql-keyword'
}
// Literals
if (nodeType === 'string_value') return 'graphql-string'
if (nodeType === 'int_value' || nodeType === 'float_value') return 'graphql-number'
// Punctuation
if (isPunctuationNodeType(nodeType)) {
return 'graphql-punctuation'
}
// Names - use semantic info for better classification
if (nodeType === 'name') {
// Check if this is an invalid field (has invalidField semantic)
if (this.semantic && 'kind' in this.semantic && this.semantic.kind === 'InvalidField') {
return 'graphql-field-error'
}
if (isOutputField(this.semantic) || isInputField(this.semantic)) {
return 'graphql-field-interactive'
}
if (
this.semantic instanceof GraphQLObjectType
|| this.semantic instanceof GraphQLScalarType
|| this.semantic instanceof GraphQLInterfaceType
) {
return 'graphql-type-interactive'
}
if (isVariable(this.semantic)) {
return 'graphql-variable'
}
if (isOperation(this.semantic)) {
return 'graphql-operation'
}
if (isFragment(this.semantic)) {
return 'graphql-fragment'
}
if (isArgument(this.semantic)) {
return 'graphql-argument'
}
}
// Variables
if (nodeType === 'variable') return 'graphql-variable'
return 'graphql-text'
}
private _isInteractive(): boolean {
if (!this.semantic) return false
// Fields, type references, arguments, and invalid fields are interactive
return isOutputField(this.semantic)
|| isInputField(this.semantic)
|| isArgument(this.semantic)
|| isInvalidField(this.semantic) // Invalid fields should show error popovers
|| this.semantic instanceof GraphQLObjectType
|| this.semantic instanceof GraphQLScalarType
|| this.semantic instanceof GraphQLInterfaceType
|| this.semantic instanceof GraphQLUnionType
|| this.semantic instanceof GraphQLEnumType
|| this.semantic instanceof GraphQLInputObjectType
}
private _getReferenceUrl(): string | null {
if (!this.semantic) return null
// Arguments - use #<field>__<argument> pattern
if (isArgument(this.semantic)) {
return `/reference/${this.semantic.parentType.name}#${this.semantic.parentField.name}__${this.semantic.argumentDef.name}`
}
// Output fields - use hash links since field routes aren't connected yet
if (isOutputField(this.semantic)) {
return `/reference/${this.semantic.parentType.name}#${this.semantic.fieldDef.name}`
}
// Input fields - use hash links since field routes aren't connected yet
if (isInputField(this.semantic)) {
return `/reference/${this.semantic.parentType.name}#${this.semantic.fieldDef.name}`
}
// Type references - use :type pattern
if (this.semantic instanceof GraphQLObjectType) {
return `/reference/${this.semantic.name}`
}
if (this.semantic instanceof GraphQLScalarType) {
return `/reference/${this.semantic.name}`
}
if (this.semantic instanceof GraphQLInterfaceType) {
return `/reference/${this.semantic.name}`
}
if (this.semantic instanceof GraphQLUnionType) {
return `/reference/${this.semantic.name}`
}
if (this.semantic instanceof GraphQLEnumType) {
return `/reference/${this.semantic.name}`
}
if (this.semantic instanceof GraphQLInputObjectType) {
return `/reference/${this.semantic.name}`
}
return null
}
}
// Cache for the parser instance
let parserPromise: Promise<WebTreeSitter.Parser> | null = null
/**
* Minimal synthetic node that implements just enough of the WebTreeSitter.Node interface
* Uses TreeSitterGraphQLNodeType for type safety
*
* IMPORTANT: This must be a class with getters to match WebTreeSitter.Node's WASM interface.
* Plain objects with properties will cause "memory access out of bounds" errors when
* tree-sitter tries to call the WASM getter functions.
*/
class SyntheticNode {
constructor(
public type: TreeSitterGraphQLNodeType,
private _text: string,
private _startIndex: number,
private _endIndex: number,
) {}
// These getters match WebTreeSitter.Node's interface
get text(): string {
return this._text
}
get startIndex(): number {
return this._startIndex
}
get endIndex(): number {
return this._endIndex
}
get childCount(): number {
return 0
}
get parent(): null {
return null
}
}
/**
* Tracks semantic context while walking the tree-sitter AST
*
* This class maintains the current GraphQL execution context as we traverse
* the syntax tree, allowing us to resolve field references to their schema
* definitions and validate field access.
*
* ## Context Management Strategy
*
* The semantic context uses a stack-based approach to track the current type context
* as we traverse nested GraphQL selections. This is essential for resolving field
* references since field names are only meaningful within their parent type context.
*
* ### Type Stack Management:
* - Each stack entry contains: { type: GraphQLType, field?: GraphQLField }
* - The `field` property tracks the field that led us to this type level
* - Stack depth corresponds to GraphQL selection nesting depth
* - Root level: operation root type (Query/Mutation/Subscription)
* - Nested levels: field return types that support sub-selections
*
* ### Context Transitions:
* - `enterOperation()`: Sets root type based on operation type
* - `enterField()`: Pushes field's return type if it's selectable (Object/Interface)
* - `exitField()`: Pops from stack when leaving a field's selection set
* - `enterFragment()`: Switches context to fragment's target type
*
* ### Argument Resolution Challenge:
* Arguments appear in the AST before their parent field context is established,
* requiring the complex lookup logic documented in the argument parsing section.
*
* @example
* ```typescript
* const context = new SemanticContext(schema)
* context.enterOperation('query') // Stack: [Query]
* context.enterField('user') // Stack: [Query, User]
* const fieldInfo = context.getFieldInfo('name') // Gets User.name field
* context.exitField() // Stack: [Query]
* ```
*/
class SemanticContext {
/**
* Stack of type contexts representing the current selection path.
* Each entry tracks the type we're currently selecting from and optionally
* the field that brought us to this type level.
*/
private typeStack: Array<{ type: GraphQLObjectType | GraphQLInterfaceType; field?: GraphQLField<any, any> }> = []
/** Current operation type, set when entering an operation definition */
operationType: 'query' | 'mutation' | 'subscription' | null = null
/** GraphQL schema used for type lookups and validation */
schema: GraphQLSchema
constructor(schema: GraphQLSchema) {
this.schema = schema
}
enterOperation(type: string) {
this.operationType = type as 'query' | 'mutation' | 'subscription'
const rootType = type === 'query'
? this.schema.getQueryType()
: type === 'mutation'
? this.schema.getMutationType()
: type === 'subscription'
? this.schema.getSubscriptionType()
: null
if (rootType) {
this.typeStack = [{ type: rootType }]
}
}
enterFragment(typeName: string) {
const type = this.schema.getType(typeName)
if (type && (isObjectType(type) || isInterfaceType(type))) {
this.typeStack = [{ type }]
}
}
getFieldInfo(
fieldName: string,
): { parentType: GraphQLObjectType | GraphQLInterfaceType; fieldDef: GraphQLField<any, any> } | null {
const current = this.typeStack[this.typeStack.length - 1]
if (!current) return null
const fields = current.type.getFields()
const fieldDef = fields[fieldName]
if (fieldDef) {
return { parentType: current.type, fieldDef }
}
return null
}
enterField(fieldName: string) {
const fieldInfo = this.getFieldInfo(fieldName)
if (fieldInfo) {
// Only push to stack if field type is object/interface
const fieldType = getNamedType(fieldInfo.fieldDef.type)
if (isObjectType(fieldType) || isInterfaceType(fieldType)) {
// Push new context with the field that brought us here
this.typeStack.push({ type: fieldType, field: fieldInfo.fieldDef })
}
}
}
exitField() {
// Only pop if we're not at root and the last entry has a field
// (meaning it was pushed by enterField for an object/interface type)
if (this.typeStack.length > 1) {
const last = this.typeStack[this.typeStack.length - 1]
if (last && last.field) {
this.typeStack.pop()
}
}
}
getArgumentInfo(argName: string) {
const current = this.typeStack[this.typeStack.length - 1]
if (!current?.field) return null
const arg = current.field.args.find(a => a.name === argName)
return arg ? { field: current.field, arg, parentType: current.type } : null
}
getCurrentType(): (GraphQLObjectType | GraphQLInterfaceType) | null {
const current = this.typeStack[this.typeStack.length - 1]
return current?.type || null
}
lookupType(typeName: string) {
return this.schema.getType(typeName)
}
reset() {
this.typeStack = []
this.operationType = null
}
}
/**
* Parse GraphQL code into interactive tokens with semantic information
*
* @param code - The raw GraphQL code to parse
* @param annotations - CodeHike annotations that might affect rendering
* @param schema - Optional GraphQL schema for semantic analysis
* @returns Array of tokens representing the parsed code
*/
export async function parseGraphQLWithTreeSitter(
code: string,
annotations: CodeAnnotation[] = [],
schema?: GraphQLSchema,
): Promise<GraphQLToken[]> {
// Validate input
if (!code || typeof code !== 'string') {
throw new Error('Invalid GraphQL code: code must be a non-empty string')
}
// Prevent parsing extremely large documents that could cause performance issues
if (code.length > 100_000) {
throw new Error('GraphQL document too large: maximum 100,000 characters allowed')
}
// Step 1: Parse with tree-sitter
const parser = await getParser()
const tree = parser.parse(code)
if (!tree) {
throw new Error('Tree-sitter failed to parse GraphQL code')
}
// Check if tree-sitter found syntax errors (disabled for now as it may be too strict)
// if (tree.rootNode.hasError) {
// throw new Error('GraphQL syntax error detected by tree-sitter parser')
// }
try {
// Step 2: Walk tree and attach semantics
const tokens = collectTokensWithSemantics(tree, code, schema, annotations)
// Step 3: Add error hint tokens after invalid fields
const tokensWithHints = addErrorHintTokens(tokens, code, annotations)
return tokensWithHints
} finally {
// ## Tree-sitter Resource Lifecycle Management
//
// Tree-sitter creates native WASM objects that must be explicitly freed to prevent memory leaks.
// The tree object holds references to parsed nodes and internal parser state that won't be
// garbage collected automatically by JavaScript.
//
// Critical cleanup points:
// 1. Always call tree.delete() in a finally block to ensure cleanup even on errors
// 2. Do not access tree or any of its nodes after calling delete()
// 3. The parser instance is cached globally and reused across multiple parsing calls
//
// Memory safety: Once tree.delete() is called, all WebTreeSitter.Node references become invalid.
// Our tokens hold references to these nodes, but only use their text and position properties
// which are copied during token creation, so the nodes can be safely deleted.
tree.delete()
}
}
/**
* Get or create the tree-sitter parser instance
*/
async function getParser(): Promise<WebTreeSitter.Parser> {
if (!parserPromise) {
parserPromise = initializeTreeSitter()
}
return parserPromise
}
/**
* Initialize tree-sitter with the GraphQL grammar
*/
async function initializeTreeSitter(): Promise<WebTreeSitter.Parser> {
try {
// Handle different environments
const isNode = typeof process !== 'undefined' && process.versions && process.versions.node
if (isNode) {
// Node.js environment (tests)
const fs = await import('node:fs/promises')
const path = await import('node:path')
// Find the actual WASM files in node_modules
const treeSitterWasmPath = path.join(process.cwd(), 'node_modules/web-tree-sitter/tree-sitter.wasm')
const graphqlWasmPath = path.join(process.cwd(), 'node_modules/tree-sitter-graphql-grammar-wasm/grammar.wasm')
await WebTreeSitter.Parser.init({
locateFile: (filename: string) => {
if (filename === 'tree-sitter.wasm') {
return treeSitterWasmPath
}
return filename
},
})
const parser = new WebTreeSitter.Parser()
const wasmBuffer = await fs.readFile(graphqlWasmPath)
const GraphQL = await WebTreeSitter.Language.load(new Uint8Array(wasmBuffer))
parser.setLanguage(GraphQL)
return parser
} else {
// Browser/Vite environment
await WebTreeSitter.Parser.init({
locateFile: (filename: string) => {
if (filename === 'tree-sitter.wasm') {
return treeSitterWasmUrl
}
return filename
},
})
const parser = new WebTreeSitter.Parser()
// Fetch the WASM file as a buffer
const response = await fetch(graphqlWasmUrl)
if (!response.ok) {
throw new Error(
`Failed to load GraphQL grammar file: ${response.status} ${response.statusText}. `
+ `This may indicate a network issue or missing grammar file.`,
)
}
const wasmBuffer = await response.arrayBuffer()
if (wasmBuffer.byteLength === 0) {
throw new Error('GraphQL grammar file is empty or corrupted')
}
const GraphQL = await WebTreeSitter.Language.load(new Uint8Array(wasmBuffer))
parser.setLanguage(GraphQL)
return parser
}
} catch (error) {
// Enhance error messages for common issues
if (error instanceof Error) {
if (error.message.includes('fetch')) {
throw new Error(`Tree-sitter initialization failed: ${error.message}. Check your network connection.`)
}
if (error.message.includes('Language.load')) {
throw new Error(`Failed to load GraphQL grammar: ${error.message}. The grammar file may be corrupted.`)
}
}
throw error
}
}
/**
* Add error hint tokens after invalid fields
*/
function addErrorHintTokens(
tokens: GraphQLToken[],
code: string,
annotations: CodeAnnotation[],
): GraphQLToken[] {
const tokensWithHints: GraphQLToken[] = []
const processedIndices = new Set<number>()
// Count invalid fields for debugging
let invalidFieldCount = 0
tokens.forEach(t => {
if (t.semantic && 'kind' in t.semantic && t.semantic.kind === 'InvalidField') {
invalidFieldCount++
}
})
if (invalidFieldCount > 10) {
// Too many invalid fields - likely a schema mismatch
// Return tokens without error hints to avoid corrupting display
console.warn(`Polen: ${invalidFieldCount} invalid fields detected. Schema may not match queries.`)
return tokens
}
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]!
// Skip if we've already processed this token (due to lookahead for arguments)
if (processedIndices.has(i)) {
continue
}
tokensWithHints.push(token)
processedIndices.add(i)
// Check if this is an invalid field
if (token.semantic && 'kind' in token.semantic && token.semantic.kind === 'InvalidField') {
// Look ahead to find where the field ends (after arguments if present)
let fieldEndIndex = i
let j = i + 1
// Skip whitespace
while (j < tokens.length && tokens[j]!.treeSitterNode.type === 'whitespace') {
j++
}
// Check if we have arguments starting with '('
if (j < tokens.length && tokens[j]!.text === '(') {
// Find the matching closing ')'
let parenDepth = 1
j++ // move past the opening '('
while (j < tokens.length && parenDepth > 0) {
const t = tokens[j]!
if (t.text === '(') parenDepth++
else if (t.text === ')') parenDepth--
j++
}
// j is now past the closing ')'
fieldEndIndex = j - 1
// Add all tokens that are part of the field's arguments
for (let k = i + 1; k <= fieldEndIndex; k++) {
if (k < tokens.length && !processedIndices.has(k)) {
tokensWithHints.push(tokens[k]!)
processedIndices.add(k)
}
}
}
// Now add the error hint after the complete field (including arguments)
const lastFieldToken = tokens[fieldEndIndex] || token
const hintText = ' ← No such field'
const hintToken = new UnifiedToken(
createSyntheticNode(
'error_hint',
hintText,
lastFieldToken.end,
lastFieldToken.end + hintText.length,
),
undefined,
annotations,
)
tokensWithHints.push(hintToken)
}
}
return tokensWithHints
}
/**
* Walk tree-sitter AST and collect tokens with semantic information
*/
function collectTokensWithSemantics(
tree: WebTreeSitter.Tree,
code: string,
schema: GraphQLSchema | undefined,
annotations: CodeAnnotation[],
): GraphQLToken[] {
const tokens: GraphQLToken[] = []
const cursor = tree.walk()
const context = schema ? new SemanticContext(schema) : null
let lastEnd = 0
function processNode() {
const node = cursor.currentNode
if (!node) return
// Handle different node types for semantic context
if (context) {
if (node.type === 'operation_definition') {
// Find the operation type child
const operationType = findChildByType(cursor, 'operation_type')
if (operationType) {
context.enterOperation(operationType.text)
}
}
if (node.type === 'fragment_definition') {
// Find the type condition
const typeCondition = findChildByType(cursor, 'type_condition')
if (typeCondition) {
const typeName = findChildByType(cursor, 'named_type', typeCondition)
if (typeName) {
const nameNode = findChildByType(cursor, 'name', typeName)
if (nameNode) {
context.enterFragment(nameNode.text)
}
}
}
}
// We don't need special handling for selection_set anymore
// Context is managed at the field level
}
// Collect leaf tokens with semantic info
// Special case: string_value, int_value, float_value nodes should be collected as whole tokens
// even though they have children (the quotes or signs)
const isValueNode = node.type === 'string_value' || node.type === 'int_value' || node.type === 'float_value'
const shouldCollectToken = (node.childCount === 0 || isValueNode) && node.text.trim() !== ''
if (shouldCollectToken) {
// Add whitespace before this token if needed
if (node.startIndex > lastEnd) {
const whitespace = code.slice(lastEnd, node.startIndex)
tokens.push(
new UnifiedToken(
createWhitespaceNode(whitespace, lastEnd, node.startIndex),
undefined,
annotations,
),
)
}
// Determine semantic info for this token
let semantic: SemanticNode | undefined
if (context && node.type === 'name') {
const parent = cursor.currentNode.parent
if (parent?.type === 'field') {
// This is a field name - get info from current context
const fieldInfo = context.getFieldInfo(node.text)
const currentType = context.getCurrentType()
if (fieldInfo) {
semantic = {
kind: 'OutputField',
parentType: fieldInfo.parentType,
fieldDef: fieldInfo.fieldDef,
}
// Enter this field's context for processing its selection set
context.enterField(node.text)
} else if (currentType) {
// Field doesn't exist - mark as invalid
semantic = {
kind: 'InvalidField',
fieldName: node.text,
parentType: currentType,
}
}
} else if (parent?.type === 'named_type') {
// This is a type reference
const type = context.lookupType(node.text)
if (type) {
// Check if it's one of the types we support as semantic nodes
if (
type instanceof GraphQLObjectType
|| type instanceof GraphQLScalarType
|| type instanceof GraphQLInterfaceType
|| type instanceof GraphQLUnionType
|| type instanceof GraphQLEnumType
|| type instanceof GraphQLInputObjectType
) {
semantic = type
}
}
} else if (parent?.type === 'operation_definition') {
// This is an operation name
semantic = {
kind: 'Operation',
type: context.operationType || 'query',
name: node.text,
}
} else if (parent?.type === 'fragment_definition') {
// This is a fragment name - for now just mark it as a fragment
semantic = {
kind: 'Fragment',
name: node.text,
onType: context.getCurrentType()!, // We'll have the type from enterFragment
}
} else if (parent?.type === 'argument') {
// This is an argument name
//
// ## Complex Argument Parsing Logic
//
// Arguments require complex tree traversal because they appear in the tree-sitter AST
// before the semantic context has been updated for their parent field. This creates
// a chicken-and-egg problem where we need the field to identify the argument, but
// the field hasn't been processed yet.
//
// Tree structure: field > arguments > argument > name
// Processing order: argument names are parsed before field context is established
//
// Our solution is to traverse up the AST to find the field node, then look for that
// field in both the root operation type and the current type context. We check the
// root type first because top-level fields (like Query.pokemon) are most common.
let argumentsNode = parent.parent
if (argumentsNode && argumentsNode.type === 'arguments') {
let fieldNode = argumentsNode.parent
if (fieldNode && fieldNode.type === 'field') {
// Find the field name node within the field node
for (let i = 0; i < fieldNode.childCount; i++) {
const child = fieldNode.child(i)
if (child && child.type === 'name') {
// We need to find the parent type that contains this field
// Start with the root type based on the operation type (query/mutation/subscription)
const rootType = context.schema.getQueryType() || context.schema.getMutationType()
|| context.schema.getSubscriptionType()
if (rootType) {
// First check if the field exists on the root type (most common case)
let field = rootType.getFields()[child.text]
let parentType: GraphQLObjectType | GraphQLInterfaceType = rootType
// If not found on root, check the current type in our semantic context
// This handles nested field arguments like User.posts(limit: 10)
if (!field) {
const currentType = context.getCurrentType()
if (currentType) {
field = currentType.getFields()[child.text]
parentType = currentType
}
}
if (field && parentType) {
const arg = field.args.find((a: GraphQLArgument) => a.name === node.text)
if (arg) {
semantic = {
kind: 'Argument',
parentType: parentType,
parentField: field,
argumentDef: arg,
}
}
}
}
break
}
}
}
}
} else if (parent?.type === 'variable') {
// This is a variable name (without the $)
semantic = {
kind: 'Variable',
name: node.text,
}
} else if (parent?.type === 'variable_definition') {
// This is a variable definition in the operation header
semantic = {
kind: 'Variable',
name: node.text,
}
}
} else if (context && node.type === 'variable' && node.text.startsWith('$')) {
// This is the full variable including $ (usage in arguments or directives)
semantic = {
kind: 'Variable',
name: node.text.slice(1),
}
}
const token = new UnifiedToken(
node,
semantic,
annotations,
)
tokens.push(token)
lastEnd = node.endIndex
}
// Traverse children (but skip children of value nodes since we collect them as whole tokens)
if (!isValueNode && cursor.gotoFirstChild()) {
do {
processNode()
} while (cursor.gotoNextSibling())
cursor.gotoParent()
// Handle context exit
if (context && node.type === 'field') {
// Only exit field context if this field has a selection set
// (meaning it's an object/interface type that pushed to the stack)
const hasSelectionSet = node.childCount > 0 && node.children.some(
child => child?.type === 'selection_set',
)
if (hasSelectionSet) {
context.exitField()
}
} else if (context && (node.type === 'operation_definition' || node.type === 'fragment_definition')) {
// Reset context when exiting operation or fragment
context.reset()
}
}
}
processNode()
// Add final whitespace if needed
if (lastEnd < code.length) {
const remaining = code.slice(lastEnd)
tokens.push(
new UnifiedToken(
createWhitespaceNode(remaining, lastEnd, code.length),
undefined,
annotations,
),
)
}
return tokens
}
/**
* Helper to find a child node by type
*/
function findChildByType(
cursor: WebTreeSitter.TreeCursor,
type: string,
node?: WebTreeSitter.Node,
): WebTreeSitter.Node | null {
const targetNode = node || cursor.currentNode
if (!targetNode) return null
for (let i = 0; i < targetNode.childCount; i++) {
const child = targetNode.child(i)
if (child && child.type === type) {
return child
}
}
return null
}
/**
* Create a pseudo tree-sitter node for whitespace
*/
function createWhitespaceNode(text: string, start: number, end: number): WebTreeSitter.Node {
// Create a synthetic node with proper getter interface
const node = new SyntheticNode('whitespace', text, start, end)
return node as unknown as WebTreeSitter.Node
}
/**
* Create a pseudo tree-sitter node for synthetic content
*
* ## Annotation Architecture
*
* Polen uses synthetic tree-sitter nodes to inject additional content into GraphQL code blocks.
* This approach was chosen after considering several alternatives:
*
* ### Current Approach: Synthetic Nodes
* We create fake tree-sitter nodes that implement just enough of the Node interface to flow
* through our token rendering pipeline. This is used for error hints that appear after invalid fields.
*
* **When to use**: When you need to add new content to the code block (not just style existing content)
*
* ### Alternative Approaches Considered:
*
* 1. **CodeHike Annotations**
* - Use CodeHike's built-in InlineAnnotation/BlockAnnotation system
* - Pros: Works with CodeHike's architecture, composable with other handlers
* - Cons: More complex, requires understanding CodeHike's annotation pipeline
* - Best for: Complex features like collapsible sections, tabs
*
* 2. **Post-Processing During Render**
* - Keep tokens unchanged, add content during the rendering phase
* - Pros: Simpler, no fake nodes needed
* - Cons: Rendering logic becomes more complex, harder to test
* - Best for: Simple conditional content
*
* 3. **Token Metadata/Props**
* - Add annotation data to token properties rather than creating new tokens
* - Pros: Clean data model, easy to test
* - Cons: Can't add new content, only modify existing tokens
* - Best for: Styling annotations (highlights, emphasis, underlines)
*
* ### Guidelines for Future Annotations:
*
* - **Styling only** (highlights, emphasis): Use token metadata/props
* - **Adding content** (error hints, tooltips): Use synthetic nodes (current approach)
* - **Complex UI** (collapsible, tabs): Consider CodeHike annotation handlers
* - **User-defined annotations**: Choose based on what the annotation does
*
* The synthetic node approach works well for Polen's error hints because we're actually
* inserting new text content ("← No such field") that needs to be positioned and styled
* like a regular token.
*/
function createSyntheticNode(
type: TreeSitterGraphQLNodeType,
text: string,
start: number,
end: number,
): WebTreeSitter.Node {
// Create a synthetic node with proper getter interface
const node = new SyntheticNode(type, text, start, end)
return node as unknown as WebTreeSitter.Node
}