feat(ext): draft extension structure (single-page mode)

This commit is contained in:
Simon
2026-01-20 17:36:33 +08:00
parent e0027f99c4
commit db3b24a5ea
37 changed files with 6334 additions and 4 deletions

10
.gitignore vendored
View File

@@ -25,11 +25,15 @@ dist-ssr
*.sw? *.sw?
.qoder .qoder
# env files
#
.env .env
.env.*
# Copied files for npm publish (generated by prepublishOnly) # Copied files for npm publish (generated by prepublishOnly)
packages/page-agent/README.md packages/page-agent/README.md
packages/page-agent/README-zh.md packages/page-agent/README-zh.md
packages/page-agent/LICENSE packages/page-agent/LICENSE
# extension
.output
.wxt

View File

@@ -8,7 +8,13 @@ import globals from 'globals'
import tseslint from 'typescript-eslint' import tseslint from 'typescript-eslint'
export default defineConfig([ export default defineConfig([
globalIgnores(['**/dist', '**/node_modules', 'packages/website/src/components/ui']), globalIgnores([
'**/dist',
'**/node_modules',
'packages/*/src/components/ui',
'**/.wxt',
'**/.output',
]),
{ {
plugins: { plugins: {
'react-hooks': reactHooks, 'react-hooks': reactHooks,
@@ -69,6 +75,8 @@ export default defineConfig([
'react-dom/no-missing-button-type': 'off', 'react-dom/no-missing-button-type': 'off',
'react-x/no-nested-component-definitions': 'off', 'react-x/no-nested-component-definitions': 'off',
'@typescript-eslint/prefer-optional-chain': 'off', '@typescript-eslint/prefer-optional-chain': 'off',
'@typescript-eslint/use-unknown-in-catch-callback-variable': 'warn',
'@typescript-eslint/no-unnecessary-type-parameters': 'off',
// 'require-await': 'off', // 'require-await': 'off',
'@typescript-eslint/require-await': 'off', '@typescript-eslint/require-await': 'off',

3531
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@
"packages/llms", "packages/llms",
"packages/core", "packages/core",
"packages/page-agent", "packages/page-agent",
"packages/extension",
"packages/website" "packages/website"
], ],
"description": "AI-powered UI agent for web applications", "description": "AI-powered UI agent for web applications",

View File

@@ -0,0 +1,2 @@
.wxt
src/components/ui

View File

@@ -0,0 +1,24 @@
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "new-york",
"rsc": false,
"tsx": true,
"tailwind": {
"config": "",
"css": "src/index.css",
"baseColor": "neutral",
"cssVariables": true,
"prefix": ""
},
"iconLibrary": "lucide",
"aliases": {
"components": "@/components",
"utils": "@/lib/utils",
"ui": "@/components/ui",
"lib": "@/lib",
"hooks": "@/hooks"
},
"registries": {
"@magicui": "https://magicui.design/r/{name}.json"
}
}

View File

@@ -0,0 +1,55 @@
{
"name": "@page-agent/ext",
"private": true,
"version": "1.0.0-beta.1",
"type": "module",
"scripts": {
"dev": "wxt",
"dev:firefox": "wxt -b firefox",
"build": "wxt build",
"build:firefox": "wxt build -b firefox",
"zip": "wxt zip",
"zip:firefox": "wxt zip -b firefox",
"postinstall": "wxt prepare"
},
"devDependencies": {
"@tailwindcss/vite": "^4.1.18",
"@types/chrome": "^0.1.34",
"@types/react": "^19.2.8",
"@types/react-dom": "^19.2.1",
"@vitejs/plugin-react-swc": "^4.1.0",
"@wxt-dev/module-react": "^1.1.5",
"i18next": "^25.7.4",
"i18next-browser-languagedetector": "^8.2.0",
"react": "^19.2.3",
"react-dom": "^19.2.3",
"react-i18next": "^16.5.2",
"tailwindcss": "^4.1.14",
"tw-animate-css": "^1.4.0",
"wouter": "^3.9.0",
"wxt": "^0.20.13"
},
"dependencies": {
"@page-agent/core": "^1.0.0-beta.1",
"@page-agent/llms": "^1.0.0-beta.1",
"@page-agent/page-controller": "^1.0.0-beta.1",
"@page-agent/ui": "^1.0.0-beta.1",
"@radix-ui/react-hover-card": "^1.1.15",
"@radix-ui/react-icons": "^1.3.2",
"@radix-ui/react-label": "^2.1.8",
"@radix-ui/react-separator": "^1.1.8",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-switch": "^1.2.6",
"@radix-ui/react-tooltip": "^1.2.8",
"@webext-core/messaging": "^2.3.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.562.0",
"motion": "^12.26.1",
"next-themes": "^0.4.6",
"rough-notation": "^0.5.1",
"simple-icons": "^16.5.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.4.0"
}
}

View File

@@ -0,0 +1,131 @@
/**
* RemotePageController - Proxy for PageController in ContentScript
*
* This class implements the same interface as PageController but forwards
* all method calls via RPC to the real PageController running in ContentScript.
* This allows PageAgentCore to work transparently with remote DOM operations.
*/
import type {
ActionResult,
BrowserState,
ScrollHorizontallyOptions,
ScrollOptions,
} from '../messaging/protocol'
import { rpcClient } from '../messaging/rpc'
/**
* RemotePageController is a proxy that implements the PageController interface.
* All methods are async and forward to ContentScript via RPC.
*
* This class extends EventTarget to maintain API compatibility with PageController,
* though events in the remote context are not currently bridged.
*/
export class RemotePageController extends EventTarget {
// ======= State Queries =======
/**
* Get current page URL
*/
async getCurrentUrl(): Promise<string> {
return rpcClient.getCurrentUrl()
}
/**
* Get last tree update timestamp
*/
async getLastUpdateTime(): Promise<number> {
return rpcClient.getLastUpdateTime()
}
/**
* Get structured browser state for LLM consumption.
*/
async getBrowserState(): Promise<BrowserState> {
return rpcClient.getBrowserState()
}
// ======= DOM Tree Operations =======
/**
* Update DOM tree, returns simplified HTML for LLM.
*/
async updateTree(): Promise<string> {
return rpcClient.updateTree()
}
/**
* Clean up all element highlights
*/
async cleanUpHighlights(): Promise<void> {
return rpcClient.cleanUpHighlights()
}
// ======= Element Actions =======
/**
* Click element by index
*/
async clickElement(index: number): Promise<ActionResult> {
return rpcClient.clickElement(index)
}
/**
* Input text into element by index
*/
async inputText(index: number, text: string): Promise<ActionResult> {
return rpcClient.inputText(index, text)
}
/**
* Select dropdown option by index and option text
*/
async selectOption(index: number, optionText: string): Promise<ActionResult> {
return rpcClient.selectOption(index, optionText)
}
/**
* Scroll vertically
*/
async scroll(options: ScrollOptions): Promise<ActionResult> {
return rpcClient.scroll(options)
}
/**
* Scroll horizontally
*/
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
return rpcClient.scrollHorizontally(options)
}
/**
* Execute arbitrary JavaScript on the page
*/
async executeJavascript(script: string): Promise<ActionResult> {
return rpcClient.executeJavascript(script)
}
// ======= Mask Operations =======
/**
* Show the visual mask overlay.
*/
async showMask(): Promise<void> {
return rpcClient.showMask()
}
/**
* Hide the visual mask overlay.
*/
async hideMask(): Promise<void> {
return rpcClient.hideMask()
}
/**
* Dispose and clean up resources
*/
dispose(): void {
rpcClient.dispose().catch(() => {
// Ignore errors on dispose
})
}
}

View File

@@ -0,0 +1,123 @@
@import 'tailwindcss';
@import 'tw-animate-css';
@custom-variant dark (&:is(.dark *));
:root {
--background: oklch(1 0 0);
--foreground: oklch(0.145 0 0);
--card: oklch(1 0 0);
--card-foreground: oklch(0.145 0 0);
--popover: oklch(1 0 0);
--popover-foreground: oklch(0.145 0 0);
--primary: oklch(0.205 0 0);
--primary-foreground: oklch(0.985 0 0);
--secondary: oklch(0.97 0 0);
--secondary-foreground: oklch(0.205 0 0);
--muted: oklch(0.97 0 0);
--muted-foreground: oklch(0.556 0 0);
--accent: oklch(0.97 0 0);
--accent-foreground: oklch(0.205 0 0);
--destructive: oklch(0.577 0.245 27.325);
--destructive-foreground: oklch(0.577 0.245 27.325);
--border: oklch(0.922 0 0);
--input: oklch(0.922 0 0);
--ring: oklch(0.708 0 0);
--chart-1: oklch(0.646 0.222 41.116);
--chart-2: oklch(0.6 0.118 184.704);
--chart-3: oklch(0.398 0.07 227.392);
--chart-4: oklch(0.828 0.189 84.429);
--chart-5: oklch(0.769 0.188 70.08);
--radius: 0.625rem;
--sidebar: oklch(0.985 0 0);
--sidebar-foreground: oklch(0.145 0 0);
--sidebar-primary: oklch(0.205 0 0);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.97 0 0);
--sidebar-accent-foreground: oklch(0.205 0 0);
--sidebar-border: oklch(0.922 0 0);
--sidebar-ring: oklch(0.708 0 0);
}
.dark {
--background: oklch(0.145 0 0);
--foreground: oklch(0.985 0 0);
--card: oklch(0.145 0 0);
--card-foreground: oklch(0.985 0 0);
--popover: oklch(0.145 0 0);
--popover-foreground: oklch(0.985 0 0);
--primary: oklch(0.985 0 0);
--primary-foreground: oklch(0.205 0 0);
--secondary: oklch(0.269 0 0);
--secondary-foreground: oklch(0.985 0 0);
--muted: oklch(0.269 0 0);
--muted-foreground: oklch(0.708 0 0);
--accent: oklch(0.269 0 0);
--accent-foreground: oklch(0.985 0 0);
--destructive: oklch(0.396 0.141 25.723);
--destructive-foreground: oklch(0.637 0.237 25.331);
--border: oklch(0.269 0 0);
--input: oklch(0.269 0 0);
--ring: oklch(0.439 0 0);
--chart-1: oklch(0.488 0.243 264.376);
--chart-2: oklch(0.696 0.17 162.48);
--chart-3: oklch(0.769 0.188 70.08);
--chart-4: oklch(0.627 0.265 303.9);
--chart-5: oklch(0.645 0.246 16.439);
--sidebar: oklch(0.205 0 0);
--sidebar-foreground: oklch(0.985 0 0);
--sidebar-primary: oklch(0.488 0.243 264.376);
--sidebar-primary-foreground: oklch(0.985 0 0);
--sidebar-accent: oklch(0.269 0 0);
--sidebar-accent-foreground: oklch(0.985 0 0);
--sidebar-border: oklch(0.269 0 0);
--sidebar-ring: oklch(0.439 0 0);
}
@theme inline {
--color-background: var(--background);
--color-foreground: var(--foreground);
--color-card: var(--card);
--color-card-foreground: var(--card-foreground);
--color-popover: var(--popover);
--color-popover-foreground: var(--popover-foreground);
--color-primary: var(--primary);
--color-primary-foreground: var(--primary-foreground);
--color-secondary: var(--secondary);
--color-secondary-foreground: var(--secondary-foreground);
--color-muted: var(--muted);
--color-muted-foreground: var(--muted-foreground);
--color-accent: var(--accent);
--color-accent-foreground: var(--accent-foreground);
--color-destructive: var(--destructive);
--color-destructive-foreground: var(--destructive-foreground);
--color-border: var(--border);
--color-input: var(--input);
--color-ring: var(--ring);
--color-chart-1: var(--chart-1);
--color-chart-2: var(--chart-2);
--color-chart-3: var(--chart-3);
--color-chart-4: var(--chart-4);
--color-chart-5: var(--chart-5);
--radius-sm: calc(var(--radius) - 4px);
--radius-md: calc(var(--radius) - 2px);
--radius-lg: var(--radius);
--radius-xl: calc(var(--radius) + 4px);
--color-sidebar: var(--sidebar);
--color-sidebar-foreground: var(--sidebar-foreground);
--color-sidebar-primary: var(--sidebar-primary);
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
--color-sidebar-accent: var(--sidebar-accent);
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
--color-sidebar-border: var(--sidebar-border);
--color-sidebar-ring: var(--sidebar-ring);
}
@layer base {
* {
@apply border-border outline-ring/50;
}
body {
@apply bg-background text-foreground;
}
}

View File

@@ -0,0 +1,60 @@
import { Slot } from '@radix-ui/react-slot'
import { type VariantProps, cva } from 'class-variance-authority'
import * as React from 'react'
import { cn } from '@/lib/utils'
const buttonVariants = cva(
"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
{
variants: {
variant: {
default: 'bg-primary text-primary-foreground hover:bg-primary/90',
destructive:
'bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60',
outline:
'border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50',
secondary: 'bg-secondary text-secondary-foreground hover:bg-secondary/80',
ghost: 'hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50',
link: 'text-primary underline-offset-4 hover:underline',
},
size: {
default: 'h-9 px-4 py-2 has-[>svg]:px-3',
sm: 'h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5',
lg: 'h-10 rounded-md px-6 has-[>svg]:px-4',
icon: 'size-9',
'icon-sm': 'size-8',
'icon-lg': 'size-10',
},
},
defaultVariants: {
variant: 'default',
size: 'default',
},
}
)
function Button({
className,
variant = 'default',
size = 'default',
asChild = false,
...props
}: React.ComponentProps<'button'> &
VariantProps<typeof buttonVariants> & {
asChild?: boolean
}) {
const Comp = asChild ? Slot : 'button'
return (
<Comp
data-slot="button"
data-variant={variant}
data-size={size}
className={cn(buttonVariants({ variant, size, className }))}
{...props}
/>
)
}
export { Button, buttonVariants }

View File

@@ -0,0 +1,75 @@
import * as React from 'react'
import { cn } from '@/lib/utils'
function Card({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card"
className={cn(
'bg-card text-card-foreground flex flex-col gap-6 rounded-xl border py-6 shadow-sm',
className
)}
{...props}
/>
)
}
function CardHeader({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card-header"
className={cn(
'@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-2 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6',
className
)}
{...props}
/>
)
}
function CardTitle({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card-title"
className={cn('leading-none font-semibold', className)}
{...props}
/>
)
}
function CardDescription({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card-description"
className={cn('text-muted-foreground text-sm', className)}
{...props}
/>
)
}
function CardAction({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card-action"
className={cn('col-start-2 row-span-2 row-start-1 self-start justify-self-end', className)}
{...props}
/>
)
}
function CardContent({ className, ...props }: React.ComponentProps<'div'>) {
return <div data-slot="card-content" className={cn('px-6', className)} {...props} />
}
function CardFooter({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="card-footer"
className={cn('flex items-center px-6 [.border-t]:pt-6', className)}
{...props}
/>
)
}
export { Card, CardHeader, CardFooter, CardTitle, CardAction, CardDescription, CardContent }

View File

@@ -0,0 +1,232 @@
import { type VariantProps, cva } from 'class-variance-authority'
import { useMemo } from 'react'
import { Label } from '@/components/ui/label'
import { Separator } from '@/components/ui/separator'
import { cn } from '@/lib/utils'
function FieldSet({ className, ...props }: React.ComponentProps<'fieldset'>) {
return (
<fieldset
data-slot="field-set"
className={cn(
'flex flex-col gap-6',
'has-[>[data-slot=checkbox-group]]:gap-3 has-[>[data-slot=radio-group]]:gap-3',
className
)}
{...props}
/>
)
}
function FieldLegend({
className,
variant = 'legend',
...props
}: React.ComponentProps<'legend'> & { variant?: 'legend' | 'label' }) {
return (
<legend
data-slot="field-legend"
data-variant={variant}
className={cn(
'mb-3 font-medium',
'data-[variant=legend]:text-base',
'data-[variant=label]:text-sm',
className
)}
{...props}
/>
)
}
function FieldGroup({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="field-group"
className={cn(
'group/field-group @container/field-group flex w-full flex-col gap-7 data-[slot=checkbox-group]:gap-3 [&>[data-slot=field-group]]:gap-4',
className
)}
{...props}
/>
)
}
const fieldVariants = cva('group/field flex w-full gap-3 data-[invalid=true]:text-destructive', {
variants: {
orientation: {
vertical: ['flex-col [&>*]:w-full [&>.sr-only]:w-auto'],
horizontal: [
'flex-row items-center',
'[&>[data-slot=field-label]]:flex-auto',
'has-[>[data-slot=field-content]]:items-start has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px',
],
responsive: [
'flex-col [&>*]:w-full [&>.sr-only]:w-auto @md/field-group:flex-row @md/field-group:items-center @md/field-group:[&>*]:w-auto',
'@md/field-group:[&>[data-slot=field-label]]:flex-auto',
'@md/field-group:has-[>[data-slot=field-content]]:items-start @md/field-group:has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px',
],
},
},
defaultVariants: {
orientation: 'vertical',
},
})
function Field({
className,
orientation = 'vertical',
...props
}: React.ComponentProps<'div'> & VariantProps<typeof fieldVariants>) {
return (
<div
role="group"
data-slot="field"
data-orientation={orientation}
className={cn(fieldVariants({ orientation }), className)}
{...props}
/>
)
}
function FieldContent({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="field-content"
className={cn('group/field-content flex flex-1 flex-col gap-1.5 leading-snug', className)}
{...props}
/>
)
}
function FieldLabel({ className, ...props }: React.ComponentProps<typeof Label>) {
return (
<Label
data-slot="field-label"
className={cn(
'group/field-label peer/field-label flex w-fit gap-2 leading-snug group-data-[disabled=true]/field:opacity-50',
'has-[>[data-slot=field]]:w-full has-[>[data-slot=field]]:flex-col has-[>[data-slot=field]]:rounded-md has-[>[data-slot=field]]:border [&>*]:data-[slot=field]:p-4',
'has-data-[state=checked]:bg-primary/5 has-data-[state=checked]:border-primary dark:has-data-[state=checked]:bg-primary/10',
className
)}
{...props}
/>
)
}
function FieldTitle({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="field-label"
className={cn(
'flex w-fit items-center gap-2 text-sm leading-snug font-medium group-data-[disabled=true]/field:opacity-50',
className
)}
{...props}
/>
)
}
function FieldDescription({ className, ...props }: React.ComponentProps<'p'>) {
return (
<p
data-slot="field-description"
className={cn(
'text-muted-foreground text-sm leading-normal font-normal group-has-[[data-orientation=horizontal]]/field:text-balance',
'last:mt-0 nth-last-2:-mt-1 [[data-variant=legend]+&]:-mt-1.5',
'[&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4',
className
)}
{...props}
/>
)
}
function FieldSeparator({
children,
className,
...props
}: React.ComponentProps<'div'> & {
children?: React.ReactNode
}) {
return (
<div
data-slot="field-separator"
data-content={!!children}
className={cn(
'relative -my-2 h-5 text-sm group-data-[variant=outline]/field-group:-mb-2',
className
)}
{...props}
>
<Separator className="absolute inset-0 top-1/2" />
{children && (
<span
className="bg-background text-muted-foreground relative mx-auto block w-fit px-2"
data-slot="field-separator-content"
>
{children}
</span>
)}
</div>
)
}
function FieldError({
className,
children,
errors,
...props
}: React.ComponentProps<'div'> & {
errors?: Array<{ message?: string } | undefined>
}) {
const content = useMemo(() => {
if (children) {
return children
}
if (!errors?.length) {
return null
}
const uniqueErrors = [...new Map(errors.map((error) => [error?.message, error])).values()]
if (uniqueErrors?.length == 1) {
return uniqueErrors[0]?.message
}
return (
<ul className="ml-4 flex list-disc flex-col gap-1">
{uniqueErrors.map((error, index) => error?.message && <li key={index}>{error.message}</li>)}
</ul>
)
}, [children, errors])
if (!content) {
return null
}
return (
<div
role="alert"
data-slot="field-error"
className={cn('text-destructive text-sm font-normal', className)}
{...props}
>
{content}
</div>
)
}
export {
Field,
FieldLabel,
FieldDescription,
FieldError,
FieldGroup,
FieldLegend,
FieldSeparator,
FieldSet,
FieldContent,
FieldTitle,
}

View File

@@ -0,0 +1,36 @@
import * as HoverCardPrimitive from '@radix-ui/react-hover-card'
import * as React from 'react'
import { cn } from '@/lib/utils'
function HoverCard({ ...props }: React.ComponentProps<typeof HoverCardPrimitive.Root>) {
return <HoverCardPrimitive.Root data-slot="hover-card" {...props} />
}
function HoverCardTrigger({ ...props }: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {
return <HoverCardPrimitive.Trigger data-slot="hover-card-trigger" {...props} />
}
function HoverCardContent({
className,
align = 'center',
sideOffset = 4,
...props
}: React.ComponentProps<typeof HoverCardPrimitive.Content>) {
return (
<HoverCardPrimitive.Portal data-slot="hover-card-portal">
<HoverCardPrimitive.Content
data-slot="hover-card-content"
align={align}
sideOffset={sideOffset}
className={cn(
'bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-64 origin-(--radix-hover-card-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden',
className
)}
{...props}
/>
</HoverCardPrimitive.Portal>
)
}
export { HoverCard, HoverCardTrigger, HoverCardContent }

View File

@@ -0,0 +1,156 @@
import { type VariantProps, cva } from 'class-variance-authority'
import * as React from 'react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Textarea } from '@/components/ui/textarea'
import { cn } from '@/lib/utils'
function InputGroup({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="input-group"
role="group"
className={cn(
'group/input-group border-input dark:bg-input/30 relative flex w-full items-center rounded-md border shadow-xs transition-[color,box-shadow] outline-none',
'h-9 min-w-0 has-[>textarea]:h-auto',
// Variants based on alignment.
'has-[>[data-align=inline-start]]:[&>input]:pl-2',
'has-[>[data-align=inline-end]]:[&>input]:pr-2',
'has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>[data-align=block-start]]:[&>input]:pb-3',
'has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-end]]:[&>input]:pt-3',
// Focus state.
'has-[[data-slot=input-group-control]:focus-visible]:border-ring has-[[data-slot=input-group-control]:focus-visible]:ring-ring/50 has-[[data-slot=input-group-control]:focus-visible]:ring-[3px]',
// Error state.
'has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[[data-slot][aria-invalid=true]]:border-destructive dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40',
className
)}
{...props}
/>
)
}
const inputGroupAddonVariants = cva(
"text-muted-foreground flex h-auto cursor-text items-center justify-center gap-2 py-1.5 text-sm font-medium select-none [&>svg:not([class*='size-'])]:size-4 [&>kbd]:rounded-[calc(var(--radius)-5px)] group-data-[disabled=true]/input-group:opacity-50",
{
variants: {
align: {
'inline-start': 'order-first pl-3 has-[>button]:ml-[-0.45rem] has-[>kbd]:ml-[-0.35rem]',
'inline-end': 'order-last pr-3 has-[>button]:mr-[-0.45rem] has-[>kbd]:mr-[-0.35rem]',
'block-start':
'order-first w-full justify-start px-3 pt-3 [.border-b]:pb-3 group-has-[>input]/input-group:pt-2.5',
'block-end':
'order-last w-full justify-start px-3 pb-3 [.border-t]:pt-3 group-has-[>input]/input-group:pb-2.5',
},
},
defaultVariants: {
align: 'inline-start',
},
}
)
function InputGroupAddon({
className,
align = 'inline-start',
...props
}: React.ComponentProps<'div'> & VariantProps<typeof inputGroupAddonVariants>) {
return (
<div
role="group"
data-slot="input-group-addon"
data-align={align}
className={cn(inputGroupAddonVariants({ align }), className)}
onClick={(e) => {
if ((e.target as HTMLElement).closest('button')) {
return
}
e.currentTarget.parentElement?.querySelector('input')?.focus()
}}
{...props}
/>
)
}
const inputGroupButtonVariants = cva('text-sm shadow-none flex gap-2 items-center', {
variants: {
size: {
xs: "h-6 gap-1 px-2 rounded-[calc(var(--radius)-5px)] [&>svg:not([class*='size-'])]:size-3.5 has-[>svg]:px-2",
sm: 'h-8 px-2.5 gap-1.5 rounded-md has-[>svg]:px-2.5',
'icon-xs': 'size-6 rounded-[calc(var(--radius)-5px)] p-0 has-[>svg]:p-0',
'icon-sm': 'size-8 p-0 has-[>svg]:p-0',
},
},
defaultVariants: {
size: 'xs',
},
})
function InputGroupButton({
className,
type = 'button',
variant = 'ghost',
size = 'xs',
...props
}: Omit<React.ComponentProps<typeof Button>, 'size'> &
VariantProps<typeof inputGroupButtonVariants>) {
return (
<Button
type={type}
data-size={size}
variant={variant}
className={cn(inputGroupButtonVariants({ size }), className)}
{...props}
/>
)
}
function InputGroupText({ className, ...props }: React.ComponentProps<'span'>) {
return (
<span
className={cn(
"text-muted-foreground flex items-center gap-2 text-sm [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4",
className
)}
{...props}
/>
)
}
function InputGroupInput({ className, ...props }: React.ComponentProps<'input'>) {
return (
<Input
data-slot="input-group-control"
className={cn(
'flex-1 rounded-none border-0 bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent',
className
)}
{...props}
/>
)
}
function InputGroupTextarea({ className, ...props }: React.ComponentProps<'textarea'>) {
return (
<Textarea
data-slot="input-group-control"
className={cn(
'flex-1 resize-none rounded-none border-0 bg-transparent py-3 shadow-none focus-visible:ring-0 dark:bg-transparent',
className
)}
{...props}
/>
)
}
export {
InputGroup,
InputGroupAddon,
InputGroupButton,
InputGroupText,
InputGroupInput,
InputGroupTextarea,
}

View File

@@ -0,0 +1,21 @@
import * as React from 'react'
import { cn } from '@/lib/utils'
function Input({ className, type, ...props }: React.ComponentProps<'input'>) {
return (
<input
type={type}
data-slot="input"
className={cn(
'file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
'focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]',
'aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive',
className
)}
{...props}
/>
)
}
export { Input }

View File

@@ -0,0 +1,172 @@
import { Slot } from '@radix-ui/react-slot'
import { type VariantProps, cva } from 'class-variance-authority'
import * as React from 'react'
import { Separator } from '@/components/ui/separator'
import { cn } from '@/lib/utils'
function ItemGroup({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
role="list"
data-slot="item-group"
className={cn('group/item-group flex flex-col', className)}
{...props}
/>
)
}
function ItemSeparator({ className, ...props }: React.ComponentProps<typeof Separator>) {
return (
<Separator
data-slot="item-separator"
orientation="horizontal"
className={cn('my-0', className)}
{...props}
/>
)
}
const itemVariants = cva(
'group/item flex items-center border border-transparent text-sm rounded-md transition-colors [a]:hover:bg-accent/50 [a]:transition-colors duration-100 flex-wrap outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]',
{
variants: {
variant: {
default: 'bg-transparent',
outline: 'border-border',
muted: 'bg-muted/50',
},
size: {
default: 'p-4 gap-4 ',
sm: 'py-3 px-4 gap-2.5',
},
},
defaultVariants: {
variant: 'default',
size: 'default',
},
}
)
function Item({
className,
variant = 'default',
size = 'default',
asChild = false,
...props
}: React.ComponentProps<'div'> & VariantProps<typeof itemVariants> & { asChild?: boolean }) {
const Comp = asChild ? Slot : 'div'
return (
<Comp
data-slot="item"
data-variant={variant}
data-size={size}
className={cn(itemVariants({ variant, size, className }))}
{...props}
/>
)
}
const itemMediaVariants = cva(
'flex shrink-0 items-center justify-center gap-2 group-has-[[data-slot=item-description]]/item:self-start [&_svg]:pointer-events-none group-has-[[data-slot=item-description]]/item:translate-y-0.5',
{
variants: {
variant: {
default: 'bg-transparent',
icon: "size-8 border rounded-sm bg-muted [&_svg:not([class*='size-'])]:size-4",
image: 'size-10 rounded-sm overflow-hidden [&_img]:size-full [&_img]:object-cover',
},
},
defaultVariants: {
variant: 'default',
},
}
)
function ItemMedia({
className,
variant = 'default',
...props
}: React.ComponentProps<'div'> & VariantProps<typeof itemMediaVariants>) {
return (
<div
data-slot="item-media"
data-variant={variant}
className={cn(itemMediaVariants({ variant, className }))}
{...props}
/>
)
}
function ItemContent({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="item-content"
className={cn('flex flex-1 flex-col gap-1 [&+[data-slot=item-content]]:flex-none', className)}
{...props}
/>
)
}
function ItemTitle({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="item-title"
className={cn('flex w-fit items-center gap-2 text-sm leading-snug font-medium', className)}
{...props}
/>
)
}
function ItemDescription({ className, ...props }: React.ComponentProps<'p'>) {
return (
<p
data-slot="item-description"
className={cn(
'text-muted-foreground line-clamp-2 text-sm leading-normal font-normal text-balance',
'[&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4',
className
)}
{...props}
/>
)
}
function ItemActions({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div data-slot="item-actions" className={cn('flex items-center gap-2', className)} {...props} />
)
}
function ItemHeader({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="item-header"
className={cn('flex basis-full items-center justify-between gap-2', className)}
{...props}
/>
)
}
function ItemFooter({ className, ...props }: React.ComponentProps<'div'>) {
return (
<div
data-slot="item-footer"
className={cn('flex basis-full items-center justify-between gap-2', className)}
{...props}
/>
)
}
export {
Item,
ItemMedia,
ItemContent,
ItemActions,
ItemGroup,
ItemSeparator,
ItemTitle,
ItemDescription,
ItemHeader,
ItemFooter,
}

View File

@@ -0,0 +1,19 @@
import * as LabelPrimitive from '@radix-ui/react-label'
import * as React from 'react'
import { cn } from '@/lib/utils'
function Label({ className, ...props }: React.ComponentProps<typeof LabelPrimitive.Root>) {
return (
<LabelPrimitive.Root
data-slot="label"
className={cn(
'flex items-center gap-2 text-sm leading-none font-medium select-none group-data-[disabled=true]:pointer-events-none group-data-[disabled=true]:opacity-50 peer-disabled:cursor-not-allowed peer-disabled:opacity-50',
className
)}
{...props}
/>
)
}
export { Label }

View File

@@ -0,0 +1,26 @@
import * as SeparatorPrimitive from '@radix-ui/react-separator'
import * as React from 'react'
import { cn } from '@/lib/utils'
function Separator({
className,
orientation = 'horizontal',
decorative = true,
...props
}: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
return (
<SeparatorPrimitive.Root
data-slot="separator"
decorative={decorative}
orientation={orientation}
className={cn(
'bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-px',
className
)}
{...props}
/>
)
}
export { Separator }

View File

@@ -0,0 +1,38 @@
import {
CircleCheckIcon,
InfoIcon,
Loader2Icon,
OctagonXIcon,
TriangleAlertIcon,
} from 'lucide-react'
import { useTheme } from 'next-themes'
import { Toaster as Sonner, type ToasterProps } from 'sonner'
const Toaster = ({ ...props }: ToasterProps) => {
const { theme = 'system' } = useTheme()
return (
<Sonner
theme={theme as ToasterProps['theme']}
className="toaster group"
icons={{
success: <CircleCheckIcon className="size-4" />,
info: <InfoIcon className="size-4" />,
warning: <TriangleAlertIcon className="size-4" />,
error: <OctagonXIcon className="size-4" />,
loading: <Loader2Icon className="size-4 animate-spin" />,
}}
style={
{
'--normal-bg': 'var(--popover)',
'--normal-text': 'var(--popover-foreground)',
'--normal-border': 'var(--border)',
'--border-radius': 'var(--radius)',
} as React.CSSProperties
}
{...props}
/>
)
}
export { Toaster }

View File

@@ -0,0 +1,16 @@
import { Loader2Icon } from 'lucide-react'
import { cn } from '@/lib/utils'
function Spinner({ className, ...props }: React.ComponentProps<'svg'>) {
return (
<Loader2Icon
role="status"
aria-label="Loading"
className={cn('size-4 animate-spin', className)}
{...props}
/>
)
}
export { Spinner }

View File

@@ -0,0 +1,26 @@
import * as SwitchPrimitive from '@radix-ui/react-switch'
import * as React from 'react'
import { cn } from '@/lib/utils'
function Switch({ className, ...props }: React.ComponentProps<typeof SwitchPrimitive.Root>) {
return (
<SwitchPrimitive.Root
data-slot="switch"
className={cn(
'peer data-[state=checked]:bg-primary data-[state=unchecked]:bg-input focus-visible:border-ring focus-visible:ring-ring/50 dark:data-[state=unchecked]:bg-input/80 inline-flex h-[1.15rem] w-8 shrink-0 items-center rounded-full border border-transparent shadow-xs transition-all outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50',
className
)}
{...props}
>
<SwitchPrimitive.Thumb
data-slot="switch-thumb"
className={cn(
'bg-background dark:data-[state=unchecked]:bg-foreground dark:data-[state=checked]:bg-primary-foreground pointer-events-none block size-4 rounded-full ring-0 transition-transform data-[state=checked]:translate-x-[calc(100%-2px)] data-[state=unchecked]:translate-x-0'
)}
/>
</SwitchPrimitive.Root>
)
}
export { Switch }

View File

@@ -0,0 +1,18 @@
import * as React from 'react'
import { cn } from '@/lib/utils'
function Textarea({ className, ...props }: React.ComponentProps<'textarea'>) {
return (
<textarea
data-slot="textarea"
className={cn(
'border-input placeholder:text-muted-foreground focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 flex field-sizing-content min-h-16 w-full rounded-md border bg-transparent px-3 py-2 text-base shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
className
)}
{...props}
/>
)
}
export { Textarea }

View File

@@ -0,0 +1,178 @@
/**
* Background Script Entry Point
*
* This script runs as the extension's service worker and hosts:
* - PageAgentCore (headless agent)
* - RemotePageController (proxy to ContentScript)
* - Command handlers for SidePanel
* - Event broadcasting to SidePanel
*/
import { PageAgentCore } from '@page-agent/core'
import { RemotePageController } from '../agent/RemotePageController'
import { eventBroadcaster } from '../messaging/events'
import {
type AgentActivity,
type AgentState,
type AgentStatus,
type HistoricalEvent,
agentCommands,
} from '../messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
// Agent instance (singleton for now - single page control)
let agent: PageAgentCore | null = null
// LLM configuration (persisted in storage)
interface LLMConfig {
apiKey: string
baseURL: string
model: string
}
// Default to demo config
let llmConfig: LLMConfig = {
apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
}
export default defineBackground(() => {
console.log('[PageAgentExt] Background script started')
// Load saved config from storage
loadConfig()
// Register command handlers
registerCommandHandlers()
// Open sidepanel on action click
chrome.sidePanel
.setPanelBehavior({ openPanelOnActionClick: true })
.catch((error) => console.error('[PageAgentExt] Failed to set panel behavior:', error))
})
/**
* Load LLM configuration from storage (falls back to demo config)
*/
async function loadConfig(): Promise<void> {
const result = await chrome.storage.local.get('llmConfig')
if (result.llmConfig) {
llmConfig = result.llmConfig as LLMConfig
console.log('[PageAgentExt] Loaded LLM config from storage')
} else {
console.log('[PageAgentExt] Using default demo config')
}
}
/**
* Save LLM configuration to storage
*/
async function saveConfig(config: LLMConfig): Promise<void> {
llmConfig = config
await chrome.storage.local.set({ llmConfig: config })
console.log('[PageAgentExt] Saved LLM config')
}
/**
* Get current agent state snapshot
*/
function getAgentState(): AgentState {
if (!agent) {
return {
status: 'idle',
task: '',
history: [],
}
}
return {
status: agent.status as AgentStatus,
task: agent.task,
history: agent.history as HistoricalEvent[],
}
}
/**
* Create and configure agent instance
*/
function createAgent(): PageAgentCore {
const pageController = new RemotePageController()
const newAgent = new PageAgentCore({
...llmConfig,
pageController: pageController as any, // Type assertion for interface compatibility
language: 'en-US',
})
// Forward agent events to SidePanel
newAgent.addEventListener('statuschange', () => {
eventBroadcaster.status(newAgent.status as AgentStatus)
})
newAgent.addEventListener('historychange', () => {
eventBroadcaster.history(newAgent.history as HistoricalEvent[])
})
newAgent.addEventListener('activity', (e) => {
const activity = (e as CustomEvent).detail as AgentActivity
eventBroadcaster.activity(activity)
})
newAgent.addEventListener('dispose', () => {
if (agent === newAgent) {
agent = null
}
eventBroadcaster.status('idle')
})
return newAgent
}
/**
* Register command handlers for SidePanel communication
*/
function registerCommandHandlers(): void {
// Execute task
agentCommands.onMessage('agent:execute', async ({ data: task }) => {
console.log('[PageAgentExt] Executing task:', task)
// Create new agent if needed
if (!agent || agent.disposed) {
agent = createAgent()
}
// Execute task (don't await - runs in background)
agent.execute(task).catch((error) => {
console.error('[PageAgentExt] Task execution error:', error)
eventBroadcaster.status('error')
})
})
// Stop agent
agentCommands.onMessage('agent:stop', async () => {
console.log('[PageAgentExt] Stopping agent')
if (agent) {
agent.dispose('User requested stop')
agent = null
}
})
// Get current state
agentCommands.onMessage('agent:getState', async () => {
return getAgentState()
})
// Configure LLM
agentCommands.onMessage('agent:configure', async ({ data: config }) => {
await saveConfig(config)
// Recreate agent with new config if it exists
if (agent && !agent.disposed) {
agent.dispose('Configuration changed')
agent = null
}
})
console.log('[PageAgentExt] Command handlers registered')
}

View File

@@ -0,0 +1,99 @@
/**
* Content Script Entry Point
*
* This script runs in the context of web pages and hosts the real PageController.
* It listens for RPC messages from Background and dispatches them to PageController.
*/
import { PageController } from '@page-agent/page-controller'
import { pageControllerRPC } from '../messaging/protocol'
export default defineContentScript({
matches: ['<all_urls>'],
runAt: 'document_idle',
main() {
console.log('[PageAgentExt] Content script loaded')
// Create PageController instance with mask enabled
const controller = new PageController({
enableMask: true,
})
// Register RPC handlers
registerRPCHandlers(controller)
// Cleanup on page unload
window.addEventListener('beforeunload', () => {
controller.dispose()
})
},
})
/**
* Register all RPC message handlers for PageController methods
*/
function registerRPCHandlers(controller: PageController): void {
// State queries
pageControllerRPC.onMessage('rpc:getCurrentUrl', async () => {
return controller.getCurrentUrl()
})
pageControllerRPC.onMessage('rpc:getLastUpdateTime', async () => {
return controller.getLastUpdateTime()
})
pageControllerRPC.onMessage('rpc:getBrowserState', async () => {
return controller.getBrowserState()
})
// DOM operations
pageControllerRPC.onMessage('rpc:updateTree', async () => {
return controller.updateTree()
})
pageControllerRPC.onMessage('rpc:cleanUpHighlights', async () => {
await controller.cleanUpHighlights()
})
// Element actions
pageControllerRPC.onMessage('rpc:clickElement', async ({ data: index }) => {
return controller.clickElement(index)
})
pageControllerRPC.onMessage('rpc:inputText', async ({ data }) => {
return controller.inputText(data.index, data.text)
})
pageControllerRPC.onMessage('rpc:selectOption', async ({ data }) => {
return controller.selectOption(data.index, data.optionText)
})
pageControllerRPC.onMessage('rpc:scroll', async ({ data: options }) => {
return controller.scroll(options)
})
pageControllerRPC.onMessage('rpc:scrollHorizontally', async ({ data: options }) => {
return controller.scrollHorizontally(options)
})
pageControllerRPC.onMessage('rpc:executeJavascript', async ({ data: script }) => {
return controller.executeJavascript(script)
})
// Mask operations
pageControllerRPC.onMessage('rpc:showMask', async () => {
await controller.showMask()
})
pageControllerRPC.onMessage('rpc:hideMask', async () => {
await controller.hideMask()
})
// Lifecycle
pageControllerRPC.onMessage('rpc:dispose', async () => {
controller.dispose()
})
console.log('[PageAgentExt] RPC handlers registered')
}

View File

@@ -0,0 +1,490 @@
import {
ArrowRight,
Bot,
CheckCircle,
Loader2,
MessageSquare,
Send,
Settings,
Sparkles,
Square,
XCircle,
} from 'lucide-react'
import { Fragment, useCallback, useEffect, useRef, useState } from 'react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import {
InputGroup,
InputGroupAddon,
InputGroupButton,
InputGroupTextarea,
} from '@/components/ui/input-group'
import { cn } from '@/lib/utils'
import { subscribeToEvents } from '@/messaging/events'
import { agentCommands } from '@/messaging/protocol'
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from '@/messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
// Configuration panel component
function ConfigPanel({ onClose }: { onClose: () => void }) {
const [apiKey, setApiKey] = useState(DEMO_API_KEY)
const [baseURL, setBaseURL] = useState(DEMO_BASE_URL)
const [model, setModel] = useState(DEMO_MODEL)
const [saving, setSaving] = useState(false)
useEffect(() => {
chrome.storage.local.get('llmConfig').then((result) => {
const config = result.llmConfig as
| { apiKey?: string; baseURL?: string; model?: string }
| undefined
if (config) {
setApiKey(config.apiKey || DEMO_API_KEY)
setBaseURL(config.baseURL || DEMO_BASE_URL)
setModel(config.model || DEMO_MODEL)
}
})
}, [])
const handleSave = async () => {
setSaving(true)
try {
await agentCommands.sendMessage('agent:configure', { apiKey, baseURL, model })
onClose()
} finally {
setSaving(false)
}
}
return (
<div className="flex flex-col gap-4 p-4">
<h2 className="text-base font-semibold">Settings</h2>
<div className="flex flex-col gap-1.5">
<label className="text-xs text-muted-foreground">API Key</label>
<Input
type="text"
placeholder="sk-..."
value={apiKey}
onChange={(e) => setApiKey(e.target.value)}
className="text-xs h-8"
/>
</div>
<div className="flex flex-col gap-1.5">
<label className="text-xs text-muted-foreground">Base URL</label>
<Input
placeholder="https://api.openai.com/v1"
value={baseURL}
onChange={(e) => setBaseURL(e.target.value)}
className="text-xs h-8"
/>
</div>
<div className="flex flex-col gap-1.5">
<label className="text-xs text-muted-foreground">Model</label>
<Input
placeholder="gpt-4o"
value={model}
onChange={(e) => setModel(e.target.value)}
className="text-xs h-8"
/>
</div>
<div className="flex gap-2 mt-2">
<Button variant="outline" onClick={onClose} className="flex-1 h-8 text-xs">
Cancel
</Button>
<Button onClick={handleSave} disabled={saving} className="flex-1 h-8 text-xs">
{saving ? <Loader2 className="size-3 animate-spin" /> : 'Save'}
</Button>
</div>
</div>
)
}
// Result card for done action
function ResultCard({ success, text }: { success: boolean; text: string }) {
return (
<div
className={cn(
'rounded-lg border p-3',
success ? 'border-green-500/30 bg-green-500/10' : 'border-destructive/30 bg-destructive/10'
)}
>
<div className="flex items-center gap-2 mb-1.5">
{success ? (
<CheckCircle className="size-3.5 text-green-500" />
) : (
<XCircle className="size-3.5 text-destructive" />
)}
<span
className={cn(
'text-xs font-medium',
success ? 'text-green-600 dark:text-green-400' : 'text-destructive'
)}
>
Result: {success ? 'Success' : 'Failed'}
</span>
</div>
<p className="text-xs text-muted-foreground pl-5 whitespace-pre-wrap">{text}</p>
</div>
)
}
// Reflection section in step card
function ReflectionSection({
reflection,
}: {
reflection: {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
}
}) {
const items = [
{ icon: '✅', label: 'eval', value: reflection.evaluation_previous_goal },
{ icon: '💾', label: 'memory', value: reflection.memory },
{ icon: '🎯', label: 'goal', value: reflection.next_goal },
].filter((item) => item.value)
if (items.length === 0) return null
return (
<div className="mb-2">
<div className="text-[10px] font-medium text-muted-foreground uppercase tracking-wide mb-1">
Reflection
</div>
<div className="grid grid-cols-[auto_1fr] gap-x-2 gap-y-0.5">
{items.map((item) => (
<Fragment key={item.label}>
<span className="text-xs">{item.icon}</span>
<span className="text-xs text-muted-foreground">{item.value}</span>
</Fragment>
))}
</div>
</div>
)
}
// History event card component
function EventCard({ event }: { event: HistoricalEvent }) {
// Done action - show as result card
if (event.type === 'step' && event.action?.name === 'done') {
const input = event.action.input as { text?: string; success?: boolean }
return (
<ResultCard
success={input?.success ?? true}
text={input?.text || event.action.output || ''}
/>
)
}
if (event.type === 'step') {
return (
<div className="rounded-lg border bg-card p-2.5">
{/* Reflection */}
{event.reflection && <ReflectionSection reflection={event.reflection} />}
{/* Action */}
{event.action && (
<div>
<div className="text-[10px] font-medium text-muted-foreground uppercase tracking-wide mb-1">
{event.action.name}
</div>
<div className="flex items-start gap-1.5">
<ArrowRight className="size-3 text-blue-500 shrink-0 mt-0.5" />
<div className="flex-1 min-w-0">
<p className="text-xs text-muted-foreground mb-0.5">
{JSON.stringify(event.action.input)}
</p>
<p className="text-[11px] text-muted-foreground/70"> {event.action.output}</p>
</div>
</div>
</div>
)}
</div>
)
}
if (event.type === 'observation') {
return (
<div className="flex items-start gap-1.5 rounded-lg border bg-card p-2.5">
<MessageSquare className="size-3 text-green-500 shrink-0 mt-0.5" />
<span className="text-xs text-muted-foreground">{event.content}</span>
</div>
)
}
if (event.type === 'error') {
return (
<div className="flex items-start gap-1.5 rounded-lg border border-destructive/30 bg-destructive/10 p-2.5">
<XCircle className="size-3 text-destructive shrink-0 mt-0.5" />
<span className="text-xs text-destructive">{event.message}</span>
</div>
)
}
return null
}
// Activity card with animation
function ActivityCard({ activity }: { activity: AgentActivity }) {
const getActivityInfo = () => {
switch (activity.type) {
case 'thinking':
return { text: 'Thinking...', color: 'text-blue-500' }
case 'executing':
return { text: `Executing ${activity.tool}...`, color: 'text-amber-500' }
case 'executed':
return { text: `Done: ${activity.tool}`, color: 'text-green-500' }
case 'retrying':
return {
text: `Retrying (${activity.attempt}/${activity.maxAttempts})...`,
color: 'text-amber-500',
}
case 'error':
return { text: activity.message, color: 'text-destructive' }
}
}
const info = getActivityInfo()
return (
<div className="flex items-center gap-2 rounded-lg border bg-card/50 p-2.5 animate-pulse">
<div className="relative">
<Sparkles className={cn('size-3.5', info.color)} />
<span
className={cn(
'absolute -top-0.5 -right-0.5 size-1.5 rounded-full animate-ping',
activity.type === 'thinking'
? 'bg-blue-500'
: activity.type === 'executing'
? 'bg-amber-500'
: activity.type === 'retrying'
? 'bg-amber-500'
: activity.type === 'error'
? 'bg-destructive'
: 'bg-green-500'
)}
/>
</div>
<span className={cn('text-xs', info.color)}>{info.text}</span>
</div>
)
}
// Status dot indicator
function StatusDot({ status }: { status: AgentStatus }) {
const colorClass = {
idle: 'bg-muted-foreground',
running: 'bg-blue-500',
completed: 'bg-green-500',
error: 'bg-destructive',
}[status]
const label = {
idle: 'Ready',
running: 'Running',
completed: 'Done',
error: 'Error',
}[status]
return (
<div className="flex items-center gap-1.5">
<span
className={cn('size-2 rounded-full', colorClass, status === 'running' && 'animate-pulse')}
/>
<span className="text-xs text-muted-foreground">{label}</span>
</div>
)
}
// Logo component (Bot icon as placeholder until real logo is added)
function Logo({ className }: { className?: string }) {
return <Bot className={cn('text-primary', className)} />
}
// Empty state with logo
function EmptyState() {
return (
<div className="flex flex-col items-center justify-center h-full gap-3 text-center px-6">
<Logo className="size-20 opacity-80" />
<div>
<h2 className="text-sm font-medium text-foreground">Page Agent Ext</h2>
<p className="text-xs text-muted-foreground mt-1">Enter a task to automate this page</p>
</div>
</div>
)
}
export default function App() {
const [showConfig, setShowConfig] = useState(false)
const [task, setTask] = useState('')
const [status, setStatus] = useState<AgentStatus>('idle')
const [history, setHistory] = useState<HistoricalEvent[]>([])
const [activity, setActivity] = useState<AgentActivity | null>(null)
const [currentTask, setCurrentTask] = useState('')
const historyRef = useRef<HTMLDivElement>(null)
const textareaRef = useRef<HTMLTextAreaElement>(null)
// Subscribe to agent events
useEffect(() => {
// Initialize with demo config if not set
chrome.storage.local.get('llmConfig').then((result) => {
if (!result.llmConfig) {
chrome.storage.local.set({
llmConfig: { apiKey: DEMO_API_KEY, baseURL: DEMO_BASE_URL, model: DEMO_MODEL },
})
}
})
const unsubscribe = subscribeToEvents({
onStatus: (newStatus) => {
setStatus(newStatus)
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
setActivity(null)
}
},
onHistory: (newHistory) => {
setHistory(newHistory)
},
onActivity: (newActivity) => {
setActivity(newActivity)
},
onStateSnapshot: (state) => {
setStatus(state.status)
setHistory(state.history)
setCurrentTask(state.task)
},
})
// Get initial state
agentCommands.sendMessage('agent:getState', undefined).then((state: AgentState) => {
setStatus(state.status)
setHistory(state.history)
setCurrentTask(state.task)
})
return unsubscribe
}, [])
// Auto-scroll to bottom on new events
useEffect(() => {
if (historyRef.current) {
historyRef.current.scrollTop = historyRef.current.scrollHeight
}
}, [history, activity])
const handleSubmit = useCallback(
async (e?: React.FormEvent) => {
e?.preventDefault()
if (!task.trim() || status === 'running') return
setCurrentTask(task)
setHistory([])
await agentCommands.sendMessage('agent:execute', task)
setTask('')
},
[task, status]
)
const handleStop = useCallback(async () => {
await agentCommands.sendMessage('agent:stop', undefined)
}, [])
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault()
handleSubmit()
}
}
if (showConfig) {
return <ConfigPanel onClose={() => setShowConfig(false)} />
}
const isRunning = status === 'running'
const showEmptyState = !currentTask && history.length === 0 && !isRunning
return (
<div className="flex flex-col h-screen bg-background">
{/* Header */}
<div className="flex items-center justify-between border-b px-3 py-2">
<div className="flex items-center gap-2">
<Logo className="size-5" />
<span className="text-sm font-medium">Page Agent Ext</span>
</div>
<div className="flex items-center gap-3">
<StatusDot status={status} />
<Button variant="ghost" size="icon-sm" onClick={() => setShowConfig(true)}>
<Settings className="size-3.5" />
</Button>
</div>
</div>
{/* Content */}
<div className="flex-1 overflow-hidden flex flex-col">
{/* Current task */}
{currentTask && (
<div className="border-b px-3 py-2 bg-muted/30">
<div className="text-[10px] text-muted-foreground uppercase tracking-wide">Task</div>
<div className="text-xs font-medium truncate" title={currentTask}>
{currentTask}
</div>
</div>
)}
{/* History */}
<div ref={historyRef} className="flex-1 overflow-y-auto p-3 space-y-2">
{showEmptyState && <EmptyState />}
{history.map((event, index) => (
<EventCard key={index} event={event} />
))}
{/* Activity indicator at bottom */}
{activity && <ActivityCard activity={activity} />}
</div>
</div>
{/* Input */}
<div className="border-t p-3">
<InputGroup className="relative">
<InputGroupTextarea
ref={textareaRef}
placeholder="Describe your task... (Enter to send)"
value={task}
onChange={(e) => setTask(e.target.value)}
onKeyDown={handleKeyDown}
disabled={isRunning}
rows={2}
className="text-xs pr-12 min-h-[60px]"
/>
<InputGroupAddon align="inline-end" className="absolute bottom-2 right-2">
{isRunning ? (
<InputGroupButton
size="icon-sm"
variant="destructive"
onClick={handleStop}
className="size-7"
>
<Square className="size-3" />
</InputGroupButton>
) : (
<InputGroupButton
size="icon-sm"
variant="default"
onClick={() => handleSubmit()}
disabled={!task.trim()}
className="size-7"
>
<Send className="size-3" />
</InputGroupButton>
)}
</InputGroupAddon>
</InputGroup>
</div>
</div>
)
}

View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Page Agent</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="./main.tsx"></script>
</body>
</html>

View File

@@ -0,0 +1,12 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App'
import '@/assets/index.css'
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<App />
</React.StrictMode>
)

View File

@@ -0,0 +1,6 @@
import { type ClassValue, clsx } from 'clsx'
import { twMerge } from 'tailwind-merge'
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}

View File

@@ -0,0 +1,98 @@
/**
* Agent Event Broadcasting
*
* This module handles broadcasting agent events from Background to SidePanel.
* Uses chrome.runtime API for broadcasting to all extension contexts.
*/
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from './protocol'
// Event type constants
const EVENT_TYPES = {
STATUS: 'event:status',
HISTORY: 'event:history',
ACTIVITY: 'event:activity',
STATE_SNAPSHOT: 'event:stateSnapshot',
} as const
type EventType = (typeof EVENT_TYPES)[keyof typeof EVENT_TYPES]
interface EventMessage<T = unknown> {
type: EventType
payload: T
}
/**
* Broadcast an event to all extension contexts (sidepanel, popup, etc.)
*/
function broadcast<T>(type: EventType, payload: T): void {
const message: EventMessage<T> = { type, payload }
// Use chrome.runtime.sendMessage to broadcast to all contexts
chrome.runtime.sendMessage(message).catch(() => {
// Ignore errors when no listeners are active
})
}
/**
* Event broadcaster for agent state updates.
* Called from Background to notify SidePanel of changes.
*/
export const eventBroadcaster = {
/** Broadcast status change */
status(status: AgentStatus): void {
broadcast(EVENT_TYPES.STATUS, status)
},
/** Broadcast history update */
history(history: HistoricalEvent[]): void {
broadcast(EVENT_TYPES.HISTORY, history)
},
/** Broadcast activity (transient) */
activity(activity: AgentActivity): void {
broadcast(EVENT_TYPES.ACTIVITY, activity)
},
/** Broadcast full state snapshot */
stateSnapshot(state: AgentState): void {
broadcast(EVENT_TYPES.STATE_SNAPSHOT, state)
},
}
/**
* Event listener type for SidePanel
*/
export interface EventListener {
onStatus?: (status: AgentStatus) => void
onHistory?: (history: HistoricalEvent[]) => void
onActivity?: (activity: AgentActivity) => void
onStateSnapshot?: (state: AgentState) => void
}
/**
* Subscribe to agent events in SidePanel.
* Returns an unsubscribe function.
*/
export function subscribeToEvents(listener: EventListener): () => void {
const handler = (message: EventMessage) => {
switch (message.type) {
case EVENT_TYPES.STATUS:
listener.onStatus?.(message.payload as AgentStatus)
break
case EVENT_TYPES.HISTORY:
listener.onHistory?.(message.payload as HistoricalEvent[])
break
case EVENT_TYPES.ACTIVITY:
listener.onActivity?.(message.payload as AgentActivity)
break
case EVENT_TYPES.STATE_SNAPSHOT:
listener.onStateSnapshot?.(message.payload as AgentState)
break
}
}
chrome.runtime.onMessage.addListener(handler)
return () => {
chrome.runtime.onMessage.removeListener(handler)
}
}

View File

@@ -0,0 +1,6 @@
/**
* Messaging module exports
*/
export * from './protocol'
export * from './rpc'
export * from './events'

View File

@@ -0,0 +1,164 @@
/**
* Message Protocol for PageAgentExt
*
* This file defines all message types for cross-context communication:
* - RPC: Background <-> ContentScript (PageController remote calls)
* - Commands: SidePanel -> Background (user actions)
* - Events: Background -> SidePanel (agent state updates)
*/
import { defineExtensionMessaging } from '@webext-core/messaging'
// ============================================================================
// Shared Types (re-exported from core packages for convenience)
// ============================================================================
/** Action result from PageController operations */
export interface ActionResult {
success: boolean
message: string
}
/** Browser state for LLM consumption */
export interface BrowserState {
url: string
title: string
header: string
content: string
footer: string
}
/** Scroll options */
export interface ScrollOptions {
down: boolean
numPages: number
pixels?: number
index?: number
}
/** Horizontal scroll options */
export interface ScrollHorizontallyOptions {
right: boolean
pixels: number
index?: number
}
/** Agent execution status */
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
/** Agent activity for real-time UI feedback */
export type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }
/** Historical event (simplified for serialization) */
export interface HistoricalEvent {
type: 'step' | 'observation' | 'user_takeover' | 'error'
// For 'step' type
reflection?: {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
}
action?: {
name: string
input: unknown
output: string
}
// For 'observation' type
content?: string
// For 'error' type
errorType?: 'retry' | 'error'
message?: string
}
/** Agent state snapshot */
export interface AgentState {
status: AgentStatus
task: string
history: HistoricalEvent[]
}
// ============================================================================
// RPC Protocol: Background <-> ContentScript
// Used by RemotePageController to call PageController methods
// ============================================================================
export interface PageControllerRPCProtocol {
// State queries
'rpc:getCurrentUrl': () => string
'rpc:getLastUpdateTime': () => number
'rpc:getBrowserState': () => BrowserState
// DOM operations
'rpc:updateTree': () => string
'rpc:cleanUpHighlights': () => void
// Element actions
'rpc:clickElement': (index: number) => ActionResult
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
'rpc:scroll': (options: ScrollOptions) => ActionResult
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
'rpc:executeJavascript': (script: string) => ActionResult
// Mask operations
'rpc:showMask': () => void
'rpc:hideMask': () => void
// Lifecycle
'rpc:dispose': () => void
}
// ============================================================================
// Command Protocol: SidePanel -> Background
// Used by SidePanel UI to control the agent
// ============================================================================
export interface AgentCommandProtocol {
// Task control
'agent:execute': (task: string) => void
'agent:stop': () => void
// State queries
'agent:getState': () => AgentState
// Configuration
'agent:configure': (config: { apiKey: string; baseURL: string; model: string }) => void
}
// ============================================================================
// Event Protocol: Background -> SidePanel
// Used by Background to push updates to SidePanel
// ============================================================================
export interface AgentEventProtocol {
'event:status': (status: AgentStatus) => void
'event:history': (history: HistoricalEvent[]) => void
'event:activity': (activity: AgentActivity) => void
'event:stateSnapshot': (state: AgentState) => void
}
// ============================================================================
// Messaging Instances
// ============================================================================
/**
* RPC messaging for PageController remote calls
* Background sends, ContentScript receives
*/
export const pageControllerRPC = defineExtensionMessaging<PageControllerRPCProtocol>()
/**
* Command messaging for agent control
* SidePanel sends, Background receives
*/
export const agentCommands = defineExtensionMessaging<AgentCommandProtocol>()
/**
* Event messaging for agent updates
* Background sends, SidePanel receives
*/
export const agentEvents = defineExtensionMessaging<AgentEventProtocol>()

View File

@@ -0,0 +1,108 @@
/**
* RPC utilities for PageController remote calls
*
* This module provides helper functions for making RPC calls
* from Background to ContentScript with proper error handling.
*/
import { pageControllerRPC } from './protocol'
import type {
ActionResult,
BrowserState,
ScrollHorizontallyOptions,
ScrollOptions,
} from './protocol'
/**
* Get the active tab ID for the current sidepanel context.
* In MV3, we need to explicitly target the tab.
*/
async function getActiveTabId(): Promise<number> {
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true })
if (!tab?.id) {
throw new Error('No active tab found')
}
return tab.id
}
/**
* RPC client for calling PageController methods in ContentScript.
* Each method sends a message and waits for the response.
*/
export const rpcClient = {
// State queries
async getCurrentUrl(): Promise<string> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:getCurrentUrl', undefined, tabId)
},
async getLastUpdateTime(): Promise<number> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:getLastUpdateTime', undefined, tabId)
},
async getBrowserState(): Promise<BrowserState> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:getBrowserState', undefined, tabId)
},
// DOM operations
async updateTree(): Promise<string> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:updateTree', undefined, tabId)
},
async cleanUpHighlights(): Promise<void> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:cleanUpHighlights', undefined, tabId)
},
// Element actions
async clickElement(index: number): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:clickElement', index, tabId)
},
async inputText(index: number, text: string): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:inputText', { index, text }, tabId)
},
async selectOption(index: number, optionText: string): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:selectOption', { index, optionText }, tabId)
},
async scroll(options: ScrollOptions): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:scroll', options, tabId)
},
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:scrollHorizontally', options, tabId)
},
async executeJavascript(script: string): Promise<ActionResult> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:executeJavascript', script, tabId)
},
// Mask operations
async showMask(): Promise<void> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:showMask', undefined, tabId)
},
async hideMask(): Promise<void> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:hideMask', undefined, tabId)
},
// Lifecycle
async dispose(): Promise<void> {
const tabId = await getActiveTabId()
return pageControllerRPC.sendMessage('rpc:dispose', undefined, tabId)
},
}
export type RPCClient = typeof rpcClient

View File

@@ -0,0 +1,25 @@
// Asset type declarations
declare module '*.webp' {
const src: string
export default src
}
declare module '*.png' {
const src: string
export default src
}
declare module '*.jpg' {
const src: string
export default src
}
declare module '*.jpeg' {
const src: string
export default src
}
declare module '*.svg' {
const src: string
export default src
}

View File

@@ -0,0 +1,10 @@
// Demo build (auto-init with demo LLM, for quick testing)
export const CDN_DEMO_URL = 'https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js'
export const CDN_DEMO_CN_URL =
'https://registry.npmmirror.com/page-agent/latest/files/dist/iife/page-agent.demo.js'
// Demo LLM for website testing
export const DEMO_MODEL = 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEMO_BASE_URL =
'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
export const DEMO_API_KEY = 'PAGE-AGENT-FREE-TESTING-RANDOM'

View File

@@ -0,0 +1,292 @@
# PageAgentExt Architecture
This document describes the architecture of the Chrome extension version of PageAgent, including environment definitions, communication protocols, and extension considerations.
## Environment Definitions
The extension operates across three isolated JavaScript contexts:
### 1. Background (Service Worker)
**File:** `src/entrypoints/background.ts`
**Responsibilities:**
- Hosts the headless `PageAgentCore` instance
- Manages agent lifecycle (create, execute, stop, dispose)
- Stores LLM configuration in `chrome.storage.local`
- Receives commands from SidePanel via messaging
- Broadcasts events to SidePanel for UI updates
- Uses `RemotePageController` to proxy DOM operations to ContentScript
**Key Components:**
- `PageAgentCore` - The AI agent (from `@page-agent/core`)
- `RemotePageController` - Proxy that forwards calls to ContentScript
- Command handlers for `agent:execute`, `agent:stop`, `agent:configure`
### 2. Content Script
**File:** `src/entrypoints/content.ts`
**Responsibilities:**
- Runs in the context of web pages
- Hosts the real `PageController` instance
- Performs actual DOM operations (click, input, scroll, etc.)
- Responds to RPC messages from Background
- Manages visual mask overlay during automation
**Key Components:**
- `PageController` - DOM controller (from `@page-agent/page-controller`)
- RPC handlers for all PageController methods
### 3. Side Panel (React UI)
**Files:** `src/entrypoints/sidepanel/`
**Responsibilities:**
- Provides user interface for controlling the agent
- Displays task input and execution history
- Shows real-time agent activity (thinking, executing, etc.)
- Manages LLM configuration settings
- Sends commands to Background and receives event updates
**Key Components:**
- `App.tsx` - Main React component with chat-style UI
- `ConfigPanel` - Settings form for LLM configuration
- Event subscription for real-time updates
## Communication Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ Side Panel │
│ ┌──────────────┐ ┌──────────────┐ ┌───────────────────────┐ │
│ │ Task Input │ │ Event Stream │ │ History Display │ │
│ └──────┬───────┘ └──────▲───────┘ └───────────────────────┘ │
└─────────┼─────────────────┼─────────────────────────────────────┘
│ Commands │ Events
▼ │
┌─────────────────────────────────────────────────────────────────┐
│ Background │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ PageAgentCore │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
│ │ │ LLM │ │ Tools │ │ RemotePageCtrl │ │ │
│ │ └─────────────┘ └─────────────┘ └────────┬─────────┘ │ │
│ └───────────────────────────────────────────────┼───────────┘ │
└───────────────────────────────────────────────────┼──────────────┘
│ RPC
┌─────────────────────────────────────────────────────────────────┐
│ Content Script │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ PageController │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
│ │ │ DOM Tree │ │ Actions │ │ Mask │ │ │
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
│ └───────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
┌───────────────┐
│ Web Page │
│ DOM │
└───────────────┘
```
## Message Protocol
All cross-context communication uses `@webext-core/messaging` for type safety.
### Protocol Definition
**File:** `src/messaging/protocol.ts`
### 1. RPC Protocol (Background → ContentScript)
Used by `RemotePageController` to call `PageController` methods.
```typescript
interface PageControllerRPCProtocol {
// State queries
'rpc:getCurrentUrl': () => string
'rpc:getLastUpdateTime': () => number
'rpc:getBrowserState': () => BrowserState
// DOM operations
'rpc:updateTree': () => string
'rpc:cleanUpHighlights': () => void
// Element actions
'rpc:clickElement': (index: number) => ActionResult
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
'rpc:scroll': (options: ScrollOptions) => ActionResult
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
'rpc:executeJavascript': (script: string) => ActionResult
// Mask operations
'rpc:showMask': () => void
'rpc:hideMask': () => void
// Lifecycle
'rpc:dispose': () => void
}
```
### 2. Command Protocol (SidePanel → Background)
Used by SidePanel UI to control the agent.
```typescript
interface AgentCommandProtocol {
'agent:execute': (task: string) => void
'agent:stop': () => void
'agent:getState': () => AgentState
'agent:configure': (config: LLMConfig) => void
}
```
### 3. Event Protocol (Background → SidePanel)
Used by Background to push updates to SidePanel.
```typescript
interface AgentEventProtocol {
'event:status': (status: AgentStatus) => void
'event:history': (history: HistoricalEvent[]) => void
'event:activity': (activity: AgentActivity) => void
'event:stateSnapshot': (state: AgentState) => void
}
```
## Communication Flow
### Task Execution Flow
```
1. User enters task in SidePanel
└─> SidePanel sends 'agent:execute' command
2. Background receives command
├─> Creates PageAgentCore with RemotePageController
└─> Starts task execution
3. Agent executes step loop:
├─> LLM generates next action
├─> Agent calls RemotePageController method
│ └─> RPC message sent to ContentScript
├─> ContentScript executes on real PageController
│ └─> RPC response returned
├─> Agent updates history
└─> Background broadcasts events to SidePanel
4. SidePanel receives events
└─> Updates UI (status, history, activity)
5. Task completes or user stops
└─> Agent disposes, status changes to idle/completed/error
```
### Configuration Flow
```
1. User opens Settings in SidePanel
2. User enters API credentials
3. SidePanel sends 'agent:configure' command
4. Background saves config to chrome.storage.local
5. Next agent creation uses new config
```
## File Structure
```
packages/extension/src/
├── agent/
│ └── RemotePageController.ts # Proxy for PageController
├── entrypoints/
│ ├── background.ts # Service worker
│ ├── content.ts # Content script
│ └── sidepanel/
│ ├── index.html
│ ├── main.tsx
│ └── App.tsx # Main UI component
├── messaging/
│ ├── protocol.ts # Message type definitions
│ ├── rpc.ts # RPC client for PageController
│ ├── events.ts # Event broadcasting utilities
│ └── index.ts # Module exports
├── components/ui/ # shadcn components
├── lib/utils.ts # Utility functions
└── assets/index.css # Tailwind styles
```
## Extension Considerations
### Current Limitations (v1)
1. **Single page control only** - Agent controls the active tab where SidePanel was opened
2. **No cross-tab navigation** - Cannot follow links that open in new tabs
3. **Session-based** - Agent state is not persisted across extension restarts
### Future Extension Points
#### Multi-tab Control
To support controlling multiple tabs:
1. Add `tabId` parameter to RPC messages
2. Track tab-to-controller mapping in Background
3. Allow SidePanel to switch between controlled tabs
#### Persistent Sessions
To persist agent sessions:
1. Store session state in `chrome.storage.local`
2. Restore agent on extension startup
3. Handle service worker restarts gracefully
#### Cross-tab Navigation
To follow links in new tabs:
1. Listen to `chrome.tabs.onCreated` events
2. Inject content script into new tabs
3. Transfer control to new tab when navigation occurs
#### Screenshot/Vision Support
To add visual context for the agent:
1. Use `chrome.tabs.captureVisibleTab` for screenshots
2. Send images to vision-capable LLM models
3. Add screenshot tool to agent toolkit
## Security Considerations
1. **API Key Storage** - Keys stored in `chrome.storage.local` (extension-only access)
2. **Content Script Isolation** - Runs in isolated world, not accessible to page scripts
3. **Message Validation** - Only trusted extension contexts can send/receive messages
4. **Permission Scope** - Request minimal permissions needed for functionality
## Development
```bash
# Install dependencies
npm install
# Start development server
npm run dev
# Build for production
npm run build
# Package extension
npm run zip
```

View File

@@ -0,0 +1,27 @@
{
"extends": "./.wxt/tsconfig.json",
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
"useDefineForClassFields": true,
"noEmit": false,
"allowImportingTsExtensions": false,
"jsx": "react-jsx",
"baseUrl": ".",
"paths": {
// Self root
"@/*": ["src/*"],
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
"@page-agent/core": ["../core/src/PageAgentCore.ts"],
"@page-agent/ui": ["../ui/src/index.ts"]
}
},
"references": [
//
{ "path": "../llms" },
{ "path": "../page-controller" },
{ "path": "../core" },
{ "path": "../ui" }
]
}

View File

@@ -0,0 +1,29 @@
import tailwindcss from '@tailwindcss/vite'
import { defineConfig } from 'wxt'
// See https://wxt.dev/api/config.html
export default defineConfig({
srcDir: 'src',
modules: ['@wxt-dev/module-react'],
vite: () => ({
plugins: [tailwindcss()],
}),
manifest: {
name: 'Page Agent',
description: 'AI Agent for browser automation',
permissions: ['tabs', 'activeTab', 'scripting', 'sidePanel', 'storage'],
host_permissions: ['<all_urls>'],
action: {
default_title: 'Open Page Agent',
},
side_panel: {
default_path: 'sidepanel/index.html',
},
web_accessible_resources: [
{
resources: ['main-world.js'],
matches: ['<all_urls>'],
},
],
},
})