docs: add new chapter files and update existing content
- Add new chapter files for frontend, backend, and assignments in stage-2 - Update stage-2 and stage-3 index pages with revised content and structure - Add new frontend design-to-code implementation guide - Update backend API development and Git workflow guides - Modify config.mjs to reflect new chapter additions and updates
This commit is contained in:
+106
-68
@@ -411,20 +411,36 @@ const stage2SidebarEn = [
|
||||
collapsed: false,
|
||||
items: [
|
||||
{
|
||||
text: 'Using Lovart for Assets',
|
||||
link: '/zh-cn/stage-2/frontend/2.0-lovart-assets/'
|
||||
text: 'Build Your Own Asset-Production Agent with Lovart',
|
||||
link: '/en/stage-2/frontend/2.0-lovart-assets/'
|
||||
},
|
||||
{
|
||||
text: 'Figma & MasterGo Basics',
|
||||
link: '/zh-cn/stage-2/frontend/2.1-figma-mastergo/'
|
||||
link: '/en/stage-2/frontend/2.1-figma-mastergo/'
|
||||
},
|
||||
{
|
||||
text: 'UI Design for Modern Apps',
|
||||
link: '/zh-cn/stage-2/frontend/2.2-ui-design/'
|
||||
text: 'Build Your First Modern App - UI Design',
|
||||
link: '/en/stage-2/frontend/2.2-ui-design/'
|
||||
},
|
||||
{
|
||||
text: 'UI Design Specs & Multi-Product',
|
||||
link: '/zh-cn/stage-2/frontend/2.3-multi-product-ui/'
|
||||
text: 'UI Design Guidelines and Multi-Product UI',
|
||||
link: '/en/stage-2/frontend/2.3-multi-product-ui/'
|
||||
},
|
||||
{
|
||||
text: 'Make Interfaces Beautiful with LLMs and Skills',
|
||||
link: '/en/stage-2/frontend/2.4-llm-skills-beautiful/'
|
||||
},
|
||||
{
|
||||
text: "Let's Build Hogwarts Portraits",
|
||||
link: '/en/stage-2/frontend/2.5-hogwarts-portraits/'
|
||||
},
|
||||
{
|
||||
text: 'From Design Prototype to Project Code',
|
||||
link: '/en/stage-2/frontend/2.6-design-to-code/'
|
||||
},
|
||||
{
|
||||
text: 'Upgrade Your UI with Modern Component Libraries',
|
||||
link: '/en/stage-2/frontend/2.7-modern-component-library/'
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -433,32 +449,28 @@ const stage2SidebarEn = [
|
||||
collapsed: false,
|
||||
items: [
|
||||
{
|
||||
text: 'Backend 1: What is API',
|
||||
link: '/zh-cn/stage-2/backend/2.1-what-is-api/'
|
||||
text: 'Learn Git and GitHub',
|
||||
link: '/en/stage-2/backend/2.4-git-workflow/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 2: Database & Supabase',
|
||||
link: '/zh-cn/stage-2/backend/2.2-database-supabase/'
|
||||
text: 'From Database to Supabase',
|
||||
link: '/en/stage-2/backend/2.2-database-supabase/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 3: AI-Assisted API Code',
|
||||
link: '/zh-cn/stage-2/backend/2.3-ai-interface-code/'
|
||||
text: 'Backend API Design and Development',
|
||||
link: '/en/stage-2/backend/2.3-ai-interface-code/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 4: Git Workflow',
|
||||
link: '/zh-cn/stage-2/backend/2.4-git-workflow/'
|
||||
text: 'Ship Your Product Prototype',
|
||||
link: '/en/stage-2/backend/2.5-zeabur-deployment/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 5: Deployment',
|
||||
link: '/zh-cn/stage-2/backend/2.5-zeabur-deployment/'
|
||||
text: 'From IDEs to CLI AI Coding Tools',
|
||||
link: '/en/stage-2/backend/2.6-modern-cli/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 6: Modern CLI Tools',
|
||||
link: '/zh-cn/stage-2/backend/2.6-modern-cli/'
|
||||
},
|
||||
{
|
||||
text: 'Backend 7: Stripe Payment',
|
||||
link: '/zh-cn/stage-2/backend/2.7-stripe-payment/'
|
||||
text: 'Integrate Stripe and Other Billing Systems',
|
||||
link: '/en/stage-2/backend/2.7-stripe-payment/'
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -468,7 +480,7 @@ const stage2SidebarEn = [
|
||||
items: [
|
||||
{
|
||||
text: 'AI 1: Dify & Knowledge Base',
|
||||
link: '/zh-cn/stage-2/ai-capabilities/2.1-dify-knowledge-base/'
|
||||
link: '/en/stage-2/ai-capabilities/2.1-dify-knowledge-base/'
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -478,15 +490,15 @@ const stage2SidebarEn = [
|
||||
items: [
|
||||
{
|
||||
text: 'Hogwarts Portraits Project',
|
||||
link: '/zh-cn/stage-2/frontend/2.4-hogwarts-portraits/'
|
||||
link: '/en/stage-2/frontend/2.5-hogwarts-portraits/'
|
||||
},
|
||||
{
|
||||
text: 'Assignment 1: Full-Stack App',
|
||||
link: '/zh-cn/stage-2/assignments/2.1-fullstack-app/'
|
||||
link: '/en/stage-2/assignments/2.1-fullstack-app/'
|
||||
},
|
||||
{
|
||||
text: 'Assignment 2: UI Library & Trae',
|
||||
link: '/zh-cn/stage-2/assignments/2.2-modern-frontend-trae/'
|
||||
link: '/en/stage-2/assignments/2.2-modern-frontend-trae/'
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -494,76 +506,102 @@ const stage2SidebarEn = [
|
||||
|
||||
const stage3SidebarEn = [
|
||||
{
|
||||
text: 'Claude Code In-Depth',
|
||||
text: 'Core Skills',
|
||||
collapsed: false,
|
||||
items: [
|
||||
{
|
||||
text: 'Claude Code 快速上手核心指南',
|
||||
link: '/zh-cn/stage-3/core-skills/basics/'
|
||||
text: 'Claude Code Quickstart Core Guide',
|
||||
link: '/en/stage-3/core-skills/basics/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Code MCP 完全指南',
|
||||
link: '/zh-cn/stage-3/core-skills/mcp/'
|
||||
text: 'MCP and Claude Code Complete Guide',
|
||||
link: '/en/stage-3/core-skills/mcp/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Code Skills 完全指南',
|
||||
link: '/zh-cn/stage-3/core-skills/skills/'
|
||||
text: 'Claude Code Skills Complete Guide',
|
||||
link: '/en/stage-3/core-skills/skills/'
|
||||
},
|
||||
{
|
||||
text: 'Long Running Tasks',
|
||||
link: '/zh-cn/stage-3/core-skills/long-running-tasks/'
|
||||
text: 'Making Claude Code Work on Long-Running Tasks',
|
||||
link: '/en/stage-3/core-skills/long-running-tasks/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Agent Teams 完全指南',
|
||||
link: '/zh-cn/stage-3/core-skills/agent-teams/'
|
||||
text: 'Claude Agent Teams Complete Guide',
|
||||
link: '/en/stage-3/core-skills/agent-teams/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Code Superpowers 工程级开发',
|
||||
link: '/zh-cn/stage-3/core-skills/superpowers/'
|
||||
text: 'Claude Code Superpowers for Production-Grade Development',
|
||||
link: '/en/stage-3/core-skills/superpowers/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Code 工作流最佳实践',
|
||||
link: '/zh-cn/stage-3/core-skills/workflow/'
|
||||
text: 'AI-Assisted Development Workflow',
|
||||
link: '/en/stage-3/core-skills/workflow/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Code 手机远程开发',
|
||||
link: '/zh-cn/stage-3/core-skills/mobile-development/'
|
||||
text: 'Claude Code Remote Development on Mobile',
|
||||
link: '/en/stage-3/core-skills/mobile-development/'
|
||||
},
|
||||
{
|
||||
text: 'Claude Agent SDK 完全指南',
|
||||
link: '/zh-cn/stage-3/core-skills/claude-agent-sdk/',
|
||||
items: [
|
||||
text: 'Claude Agent SDK Complete Guide',
|
||||
link: '/en/stage-3/core-skills/claude-agent-sdk/'
|
||||
},
|
||||
{
|
||||
text: 'Spec Coding:规范驱动开发',
|
||||
link: '/zh-cn/stage-3/core-skills/claude-agent-sdk/spec-coding'
|
||||
}
|
||||
]
|
||||
text: 'From Vibe Coding to Spec Coding',
|
||||
link: '/en/stage-3/core-skills/spec-coding/'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
text: 'Multi-Platform',
|
||||
text: 'Cross-Platform Development',
|
||||
collapsed: false,
|
||||
items: [
|
||||
{
|
||||
text: 'Advanced 3: WeChat Mini Program',
|
||||
link: '/zh-cn/stage-3/cross-platform/3.3-wechat-miniprogram/'
|
||||
text: 'How to Choose the Right Platform for Your App',
|
||||
link: '/en/stage-3/cross-platform/3.1-choose-platform/'
|
||||
},
|
||||
{
|
||||
text: 'Advanced 4: WeChat Mini Program (Backend)',
|
||||
link: '/zh-cn/stage-3/cross-platform/3.4-wechat-miniprogram-backend/'
|
||||
text: 'How to Build a Simple WeChat Mini Program',
|
||||
link: '/en/stage-3/cross-platform/3.3-wechat-miniprogram/'
|
||||
},
|
||||
{
|
||||
text: 'Advanced 5: Android App - Compose',
|
||||
link: '/zh-cn/stage-3/cross-platform/3.5-android-app/'
|
||||
text: 'How to Build a WeChat Mini Program with a Backend',
|
||||
link: '/en/stage-3/cross-platform/3.4-wechat-miniprogram-backend/'
|
||||
},
|
||||
{
|
||||
text: 'Advanced 6: iOS App - SwiftUI',
|
||||
link: '/zh-cn/stage-3/cross-platform/3.6-ios-app/'
|
||||
text: 'How to Build an Android App with Jetpack Compose',
|
||||
link: '/en/stage-3/cross-platform/3.5-android-app/'
|
||||
},
|
||||
{
|
||||
text: 'Advanced 7: Personal Website & Blog',
|
||||
link: '/zh-cn/stage-3/personal-brand/3.7-personal-website-blog/'
|
||||
text: 'How to Build an iOS App with SwiftUI',
|
||||
link: '/en/stage-3/cross-platform/3.6-ios-app/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build a PWA Local App',
|
||||
link: '/en/stage-3/cross-platform/3.8-pwa-local-app/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build a Browser AI Assistant Extension',
|
||||
link: '/en/stage-3/cross-platform/3.9-browser-ai-extension/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build a Cross-Platform Electron Desktop App',
|
||||
link: '/en/stage-3/cross-platform/3.10-electron-voice-to-text/'
|
||||
},
|
||||
{
|
||||
text: 'How to Quickly Build and Mint an NFT',
|
||||
link: '/en/stage-3/cross-platform/3.11-nft-minting/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build a VS Code Extension',
|
||||
link: '/en/stage-3/cross-platform/3.12-vscode-extension/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build an Industrial Qt Desktop App',
|
||||
link: '/en/stage-3/cross-platform/3.13-qt-industrial-hmi/'
|
||||
},
|
||||
{
|
||||
text: 'How to Build Your Personal Website and Academic Blog',
|
||||
link: '/en/stage-3/personal-brand/3.7-personal-website-blog/'
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -572,12 +610,12 @@ const stage3SidebarEn = [
|
||||
collapsed: false,
|
||||
items: [
|
||||
{
|
||||
text: 'AI Advanced 1: RAG Introduction',
|
||||
link: '/zh-cn/stage-3/ai-advanced/3.a1-rag-introduction/'
|
||||
text: 'What Is RAG and How It Works',
|
||||
link: '/en/stage-3/ai-advanced/3.a1-rag-introduction/'
|
||||
},
|
||||
{
|
||||
text: 'AI Advanced 2: LangGraph & Advanced RAG',
|
||||
link: '/zh-cn/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/'
|
||||
text: 'Advanced RAG and Workflow Orchestration with LangGraph',
|
||||
link: '/en/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/'
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2121,12 +2159,12 @@ Sitemap: ${siteUrl}/sitemap.xml
|
||||
},
|
||||
{
|
||||
text: 'Full-Stack Development',
|
||||
link: '/zh-cn/stage-2/',
|
||||
link: '/en/stage-2/',
|
||||
activeMatch: '/en/stage-2/'
|
||||
},
|
||||
{
|
||||
text: 'Advanced Development',
|
||||
link: '/zh-cn/stage-3/',
|
||||
link: '/en/stage-3/',
|
||||
activeMatch: '/en/stage-3/'
|
||||
},
|
||||
{
|
||||
|
||||
@@ -433,154 +433,233 @@ The reconstructed product is called "Diabetics Eat with Peace of Mind," position
|
||||
|
||||
The ordinary idea is aggregating news from various platforms, saving the trouble of opening them one by one. But Toutiao, Tencent News, etc., already do this well.
|
||||
|
||||
Xiao Ming horizontally segmented and found the investment research group: They need to track industry dynamics, but information is scattered; Need to quickly filter valuable information from massive news; Need to correlate news with stock price movements.
|
||||
Xiao Ming then did horizontal segmentation and found that financial analysts have a special need: they must track dynamics in specific industries, but information is too fragmented. He further deep-dived into their scenarios: in the morning they check overnight U.S. market moves and exchange-rate changes; during the day they track announcements and industry news for portfolio companies; in the afternoon they research potential targets and need large amounts of sector information.
|
||||
|
||||
The reconstructed product is called "Investment Research Intelligence Officer," positioned as an "intelligent assistant for investment researchers."
|
||||
The reconstructed product is called "Investment Research Intelligence Officer," positioned as an "information radar and decision assistant" for financial professionals.
|
||||
|
||||
---
|
||||
|
||||
#### Example 3: From "Campus Second-Hand Platform" to "Graduation Clearance Assistant"
|
||||
|
||||
The ordinary idea is a campus second-hand marketplace. But Xianyu and Zhuanzhuan are already very mature.
|
||||
|
||||
After horizontal segmentation, Xiao Ming found that graduates have a special need: they have too many things, and selling one by one is too troublesome. Deep-diving into their scenarios: they must leave campus within a week before graduation and do not have time to sell slowly; they do not know who needs their items; bargaining, delivery, and payment collection are all too cumbersome.
|
||||
|
||||
The reconstructed product is called "Graduation Clearance Assistant," positioned as a "move-out asset manager" for graduates.
|
||||
|
||||
---
|
||||
|
||||
### Act 2 Summary: Xiao Ming's Takeaways
|
||||
|
||||
Through Act 2, Xiao Ming understood:
|
||||
|
||||
**1. Start from yourself**
|
||||
- You yourself are a user, so you naturally understand that group's pain points
|
||||
- Interest is the best starting point, and passion is the best driving force
|
||||
|
||||
**2. Segment users horizontally**
|
||||
- Do not serve "everyone"; find "the segment with the strongest pain"
|
||||
- The more specific the segment, the more opportunity and stronger willingness to pay
|
||||
|
||||
**3. Deep-dive scenarios vertically**
|
||||
- Describe the complete user journey: before use, during use, after use
|
||||
- Find emotional touchpoints: fear, anxiety, helplessness, loneliness...
|
||||
|
||||
**4. Reconstruct value**
|
||||
- Upgrade from a "feature" to a "solution"
|
||||
- Upgrade from a "tool" to an "assistant/manager/companion"
|
||||
|
||||
---
|
||||
|
||||
📦 **Act Output:**
|
||||
- Found one precise target user group (postpartum moms)
|
||||
- Understood users' complete scenarios and real emotions
|
||||
- Reconstructed the product concept with a clear differentiation strategy
|
||||
|
||||
---
|
||||
|
||||
## Act 3: AI Dialogue Refinement
|
||||
|
||||
After finding the direction, Xiao Ming didn't rush to code. He knew that a good idea also needs a clear execution plan.
|
||||
Xiao Ming now had a clear product direction: a postpartum mom recovery assistant. But he still did not know exactly how to execute it, where to start, or how difficult the technical implementation would be.
|
||||
|
||||
He decided to use AI to help him refine the product concept.
|
||||
He decided to use AI to help refine the concept and turn the idea into an actionable plan.
|
||||
|
||||
### Using AI to Refine Product Concepts
|
||||
### Round 1: Present the raw idea
|
||||
|
||||
Xiao Ming opened ChatGPT and entered the following prompt:
|
||||
|
||||
```
|
||||
I want to create a "Postpartum Mom Recovery Assistant" product.
|
||||
Target users: Postpartum moms within 1 year
|
||||
Core pain point: Want to recover figure but no time, don't know how to exercise safely
|
||||
Main features:
|
||||
1. Fragmented training (10-15 minutes per session)
|
||||
2. Postpartum-specific courses
|
||||
3. AI movement correction
|
||||
4. Psychological support community
|
||||
|
||||
Please help me refine:
|
||||
1. Is this product concept complete? What's missing?
|
||||
2. What should be the MVP (Minimum Viable Product) features?
|
||||
3. What are the potential risks and challenges?
|
||||
4. How to validate market demand?
|
||||
```
|
||||
|
||||
AI gave detailed suggestions:
|
||||
|
||||
**1. Product Concept Completeness Analysis:**
|
||||
- Missing: User acquisition channels, retention mechanisms, monetization path
|
||||
- Suggestion: Add referral mechanism, achievement system, tiered membership
|
||||
|
||||
**2. MVP Features:**
|
||||
- Core: 10 postpartum recovery courses + simple movement guidance
|
||||
- Don't need: AI movement correction (high technical difficulty, can be added later)
|
||||
- Don't need: Community (need user base first)
|
||||
|
||||
**3. Potential Risks:**
|
||||
- Medical compliance risk: Postpartum recovery involves medical knowledge
|
||||
- User trust: Why would users trust your courses?
|
||||
- Competition: Keep might add postpartum features
|
||||
|
||||
**4. Market Validation Methods:**
|
||||
- Create a landing page, collect interested users' emails
|
||||
- Interview 20 postpartum moms, understand their real needs
|
||||
- Pre-sell course packages, test willingness to pay
|
||||
|
||||
### Using AI to Generate Business Plans
|
||||
|
||||
Xiao Ming continued to ask AI:
|
||||
|
||||
```
|
||||
Based on the above analysis, please help me generate a simple business plan, including:
|
||||
1. Product positioning and target users
|
||||
2. Core features and MVP scope
|
||||
3. Business model and monetization path
|
||||
4. Market validation plan
|
||||
5. Risk assessment and countermeasures
|
||||
```
|
||||
|
||||
AI generated a structured business plan that Xiao Ming could directly use to communicate with potential partners or investors.
|
||||
Xiao Ming told AI his idea: "I want to build a postpartum mom recovery assistant app to help postpartum moms recover their figure. But I worry about technical implementation difficulty, especially the AI movement-correction feature. Can one person do this?"
|
||||
|
||||
---
|
||||
|
||||
## Summary: The Complete Methodology for Finding Great Ideas
|
||||
### Round 2: AI helps plan the MVP
|
||||
|
||||
Through Xiao Ming's story, we learned a complete methodology:
|
||||
AI replied: "Great question! Let's create an MVP (Minimum Viable Product) plan."
|
||||
|
||||
### 1. Establish Judgment Criteria
|
||||
- Real needs = Users willing to pay + willing to change behavior + significant loss without solution
|
||||
- Priority: Pain points > Delight points > Itch points
|
||||
AI suggested a three-phase plan. Phase 1 is 1-2 months, and the goal is demand validation. Do not build a full app yet; first build a WeChat mini program that only provides postpartum-specific courses (videos) and a check-in community. Use manual guidance for movement feedback: users upload videos and coaches reply. The goal is to validate whether postpartum moms are willing to pay.
|
||||
|
||||
### 2. Discover Pain Points
|
||||
- Start from yourself and people around you
|
||||
- Horizontal segmentation: Find specific user groups
|
||||
- Vertical deep dive: Understand complete user scenarios
|
||||
Phase 2 is 3-6 months. If Phase 1 validates successfully, then add core features. Develop AI movement correction, possibly with existing SDKs such as Baidu human-pose recognition. Add personalized plans and optimize community functions.
|
||||
|
||||
### 3. Validate Needs
|
||||
- Talk to real users
|
||||
- Analyze existing alternatives
|
||||
- Test willingness to pay
|
||||
- Estimate market size
|
||||
- Consider competitive barriers
|
||||
Phase 3 is 6-12 months, for scaling. Build an independent app, cooperate with postpartum recovery institutions, and expand to pregnancy and pre-pregnancy scenarios.
|
||||
|
||||
### 4. Refine Product Concept
|
||||
- Use AI to help refine ideas
|
||||
- Define MVP scope
|
||||
- Develop business plan
|
||||
- Plan market validation
|
||||
For technical implementation, AI suggested using existing AI capabilities for movement correction, such as Baidu or Tencent APIs, instead of building from scratch, to reduce cost. In the early stage, use a hybrid mode of manual plus AI, then transition gradually.
|
||||
|
||||
### Key Takeaways
|
||||
For startup cost, mini program development is about 10,000-20,000 RMB if outsourced, or do it yourself with AI IDE. For content creation, cooperate with postpartum recovery coaches. For promotion, start from mom groups and Xiaohongshu.
|
||||
|
||||
1. **Direction is more important than effort** — Wrong direction, the more you do, the more wrong
|
||||
2. **Real needs are voted by users** — Not imagined by product managers
|
||||
3. **Segmentation is key** — Serving a small group well is better than pleasing everyone poorly
|
||||
4. **Validate early** — Don't invest heavily before validating demand
|
||||
5. **AI is your assistant** — Use AI to refine ideas, but the final judgment is yours
|
||||
Validation KPI: 100 users in the first month and a 10% paid conversion rate. If reached, demand is real and you continue investing; if not, adjust direction in time.
|
||||
|
||||
---
|
||||
|
||||
In the next chapter, we'll take our validated ideas and start learning how to use AI IDE to turn them into interactive product prototypes.
|
||||
### Round 3: Xiao Ming raises concerns
|
||||
|
||||
## Final Act: Your Action Plan
|
||||
|
||||
### Memory Mantra
|
||||
|
||||
**Start from one person, one concrete task, one entry point; segment broadly, drill deeply; refine with AI; validate in five steps before building.**
|
||||
|
||||
Interpretation:
|
||||
|
||||
- **One person**: Start from a user group you naturally understand
|
||||
- **One task**: Focus on one concrete problem, not everything at once
|
||||
- **One entry point**: Find a sharp initial wedge
|
||||
- **Horizontal segmentation**: Identify the user subgroup with strongest willingness to pay
|
||||
- **Vertical deep dive**: Understand the full user scenario and journey
|
||||
- **AI dialogue refinement**: Use AI to sharpen product concept and execution plan
|
||||
- **Five-step validation**: Verify demand before investing heavily
|
||||
After thinking, Xiao Ming asked again: "I am worried about content production. Postpartum recovery courses require professional guidance, and I do not have a medical background. How can I ensure course quality?"
|
||||
|
||||
---
|
||||
|
||||
### Post-class Exercise
|
||||
### Round 4: AI provides solutions
|
||||
|
||||
Pick one small but real annoyance from your daily life and apply this chapter's method:
|
||||
AI replied: "This is a common concern for technical founders, but there are solutions."
|
||||
|
||||
Solution 1 is to collaborate with professionals. Work with postpartum recovery coaches or hospital nutrition doctors. You provide the technology and product; they provide professional content. Use a revenue-sharing model.
|
||||
|
||||
Solution 2 is to integrate high-quality existing content. In the early stage, use public postpartum recovery videos, such as from YouTube or Bilibili. Do content review and curation first, then produce original content later.
|
||||
|
||||
Solution 3 is user-generated content. Encourage experienced moms to share their recovery methods, and use community voting to filter quality content, gradually building a content ecosystem.
|
||||
|
||||
AI recommended starting with Solution 1. Specific operation: find 2-3 postpartum recovery coaches and discuss cooperation; you build the product, they provide courses. Revenue split: you take 70%, they take 30%. This ensures quality while reducing cost. Early-stage course production cost can be zero with revenue sharing; you only invest development time and promotion cost.
|
||||
|
||||
**My suggestion: start with Solution 1**
|
||||
|
||||
Specific actions:
|
||||
1. Find 2-3 postpartum recovery coaches
|
||||
2. Propose cooperation: you build the product, they provide courses
|
||||
3. Revenue split: you take 70%, they take 30%
|
||||
4. This ensures content quality and lowers cost
|
||||
|
||||
**Early-stage cost:**
|
||||
- Course production: 0 RMB (revenue-share model)
|
||||
- You only need to invest development time and promotion cost
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Round 5: Xiao Ming asks about promotion
|
||||
|
||||
Xiao Ming had one final concern: "Will promotion cost be high? How do I find postpartum moms?"
|
||||
|
||||
---
|
||||
|
||||
### Round 6: AI provides promotion strategy
|
||||
|
||||
AI replied: "This group is actually easy to find, and promotion cost can be very low."
|
||||
|
||||
The most recommended channel is Xiaohongshu. Search keywords like "postpartum recovery" and "diastasis recti," find related creators, and DM for collaboration. Offer "1 month free trial" in exchange for promotion, with near-zero cost.
|
||||
|
||||
Second is mom groups. Search WeChat groups and QQ groups. Join under the name of "postpartum recovery experience sharing," share useful content, do not post direct ads, and gradually build trust.
|
||||
|
||||
You can also cooperate with hospital obstetrics departments by providing free postpartum recovery guidance and distributing flyers in hospitals, with only a few hundred RMB printing cost. Or cooperate with maternal-and-infant stores, place promotional materials, and provide trial cards with purchases, with only trial-card production cost.
|
||||
|
||||
Validation metrics: in the first month, 100 users and 10 paid users (10% conversion rate), total promotion cost under 1000 RMB, and customer acquisition cost under 10 RMB per user. If these metrics are met, demand is real and you can continue investing.
|
||||
|
||||
---
|
||||
|
||||
### Final: Xiao Ming now has a clear plan
|
||||
|
||||
After 6 rounds of dialogue, Xiao Ming finally had a clear plan.
|
||||
|
||||
Phase 1 (1-2 months): build a WeChat mini program, cooperate with 2-3 postpartum recovery coaches (revenue share), provide only postpartum-specific courses (videos) and a check-in community, and use manual movement guidance. Target: 100 users and 10% paid conversion.
|
||||
|
||||
Phase 2 (3-6 months): if Phase 1 validates successfully, continue investing. Add AI movement correction, personalized plans, and optimize community features.
|
||||
|
||||
Phase 3 (6-12 months): develop an independent app, cooperate with postpartum recovery institutions, and expand to pregnancy and pre-pregnancy phases.
|
||||
|
||||
Startup cost is very low: development done by yourself using AI IDE (0 RMB), content with coach revenue sharing (0 RMB in early stage), and promotion via Xiaohongshu plus mom groups (under 1000 RMB). Total cost under 1000 RMB.
|
||||
|
||||
---
|
||||
|
||||
### The 5-step method for AI dialogue refinement
|
||||
|
||||
From this case, Xiao Ming summarized a standard AI dialogue workflow (see Appendix E for details).
|
||||
|
||||
**Step 1: Present the raw idea.** Describe your initial idea, even if rough. Tell AI your concerns, such as heavy competition or unclear differentiation.
|
||||
|
||||
**Step 2: Ask AI to plan the MVP.** What should the minimum viable product include? How many phases? What are the goals in each phase? How difficult is implementation?
|
||||
|
||||
**Step 3: Raise your concerns.** Technical difficulty? Content production cost? Promotion cost? User acquisition difficulty? Tell AI all your concerns.
|
||||
|
||||
**Step 4: Ask AI for concrete solutions.** AI will provide specific suggestions for your concerns. Compare options and choose the best one. Estimate costs.
|
||||
|
||||
**Step 5: Finalize the plan.** Organize a clear action plan and set validation metrics. If targets are not met, adjust in time.
|
||||
|
||||
**Prompt template:**
|
||||
```text
|
||||
I want to build a [product concept],
|
||||
but I am worried about [your concern].
|
||||
Please help me:
|
||||
1. Plan an MVP
|
||||
2. Give concrete technical implementation suggestions
|
||||
3. Estimate cost
|
||||
4. Set validation metrics
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Act 3 Summary: Xiao Ming's Takeaways
|
||||
|
||||
Through Act 3, Xiao Ming understood three things.
|
||||
|
||||
**First, use AI dialogue to refine product concepts.** Do not expect one conversation to produce a perfect answer; iterate through multiple rounds. Tell AI your observations, experiences, and feedback from people around you. If AI suggestions are unreasonable, point it out in time. Always end with a concrete action plan.
|
||||
|
||||
**Second, MVP core principles.** Keep it minimal, and only build the core function. Make it verifiable, so you can quickly validate whether demand is real. Keep it low cost, and validate with the smallest possible investment.
|
||||
|
||||
**Third, validation metrics.** Paid conversion > 10% means demand is real and worth investment. Paid conversion 5-10% means demand exists but needs refinement. Paid conversion < 5% means demand does not hold and direction should be adjusted.
|
||||
|
||||
---
|
||||
|
||||
📦 **Chapter Output:**
|
||||
- A clear MVP plan
|
||||
- A known technical implementation path
|
||||
- Defined validation metrics
|
||||
|
||||
---
|
||||
|
||||
## Final Act: Your Action
|
||||
|
||||
### Memory mantra
|
||||
|
||||
**Start from one person, one thing, one entry point. Segment horizontally, dig vertically, refine through AI dialogue, and only build after five-step validation.**
|
||||
|
||||
**Explanation:**
|
||||
- **One person:** Start from yourself because you naturally understand this group
|
||||
- **One thing:** Focus on one concrete thing and do not be greedy
|
||||
- **One entry point:** Find a sharp entry point, and the more segmented, the better
|
||||
- **Horizontal segmentation:** Find users with strongest willingness to pay
|
||||
- **Vertical deep dive:** Understand users' complete journey
|
||||
- **AI dialogue:** Refine product concepts with AI dialogue
|
||||
- **Five-step validation:** Use the five-step method to validate demand authenticity
|
||||
|
||||
---
|
||||
|
||||
### Post-class exercise
|
||||
|
||||
Choose one small annoyance from your daily life and expand it using this chapter's method:
|
||||
|
||||
::: tip Exercise Task
|
||||
|
||||
**1. Describe the annoyance in one sentence**
|
||||
- Example: "I want to build a budgeting app to help users track spending."
|
||||
**1. Describe this annoyance** (in one sentence)
|
||||
- Example: "I want to build a bookkeeping app to help users record spending."
|
||||
|
||||
**2. Horizontal segmentation: list 3 user groups with different needs**
|
||||
**2. Horizontal segmentation: find 3 user groups that may have different needs**
|
||||
- Example: small business owners, parents of overseas students, freelancers
|
||||
|
||||
**3. Choose one user group and perform a vertical deep dive**
|
||||
- Example: parents of overseas students worry about spending but cannot see detailed usage
|
||||
**3. Select one group, then deep-dive vertically: describe their complete scenario and real emotions**
|
||||
- Example: scenario of overseas-student parents - they want to know how much their child spends abroad, but the child does not tell them
|
||||
|
||||
**4. Reframe product concept: from one feature to one solution**
|
||||
- Example: "Overseas Spending Steward" instead of a generic bookkeeping app
|
||||
**4. Reconstruct product concept: evolve from "one feature" into "one solution"**
|
||||
- Example: "Overseas Spending Steward" - not just bookkeeping, but giving parents confidence and visibility into overseas spending
|
||||
|
||||
**5. Evaluate your idea with the validation checklist (see Appendix F)**
|
||||
**5. Evaluate your idea with the validation checklist** (see Appendix F)
|
||||
|
||||
Share your analysis with the community and discuss with others.
|
||||
**Share your analysis in the community and discuss with other learners!**
|
||||
|
||||
:::
|
||||
|
||||
@@ -588,251 +667,420 @@ Share your analysis with the community and discuss with others.
|
||||
|
||||
## Appendix: SOP Methodology
|
||||
|
||||
### Appendix A: 5-Step Demand Validation Method
|
||||
### Appendix A: 5-Step judgment method for need analysis
|
||||
|
||||
When you have an idea, how do you quickly judge whether it is worth building?
|
||||
When you have an idea, how can you quickly judge whether it is worth investing in?
|
||||
|
||||
**Step 1: User validation (find 10 target users)**
|
||||
**Step 1: User validation - find 10 target users**
|
||||
|
||||
Do **not** ask: "Would you use my product?"
|
||||
**Do not ask:** "Will you use my product?" (false-positive rate is around 90%)
|
||||
|
||||
Ask:
|
||||
1. "How do you solve this problem today?" (real behavior)
|
||||
2. "How many times did this bother you in the last week?" (frequency)
|
||||
3. "How much time/money do you currently spend to solve it?" (willingness to pay)
|
||||
4. "If a solution requires behavior change, would you do it?" (switching cost)
|
||||
**Ask instead:**
|
||||
1. "How do you currently solve this problem?" (understand real behavior)
|
||||
2. "How many times did this problem bother you in the last week?" (understand frequency)
|
||||
3. "How much money/time did you spend to solve it?" (understand willingness to pay)
|
||||
4. "If there is a solution but it requires changing habits, are you willing?" (understand change cost)
|
||||
|
||||
Evaluation hints:
|
||||
- If 3+ users say "this hurts me every day," it may be a pain point
|
||||
- If users say "interesting, but not urgent," it's likely an itch point
|
||||
- If users already use alternatives but remain unsatisfied, there is a real opening
|
||||
**Decision criteria:**
|
||||
- If more than 3 users say "this gives me headache every day" - it may be a pain point
|
||||
- If users say "interesting, but not urgent" - most likely an itch point
|
||||
- If users say "I currently use XX, but not satisfied" - there is opportunity
|
||||
|
||||
**Key question:** what alternative are users using now?
|
||||
**Key question:** what method do users currently use to solve this problem?
|
||||
|
||||
| Alternative Type | What It Means | Opportunity |
|
||||
| Alternative Type | Description | Opportunity Assessment |
|
||||
|------------|------|---------|
|
||||
| No alternative | Users endure silently | Big opportunity, but requires market education |
|
||||
| Very manual workaround | Excel/manual/multi-person patchwork | Good opportunity |
|
||||
| Tool stitching | A + B + C tools combined | Good integration opportunity |
|
||||
| Mature product, dissatisfied | Existing gap in experience | Opportunity with differentiation |
|
||||
| Mature product, satisfied | Need disruptive edge | Low probability opportunity |
|
||||
| **No alternative** | Users silently endure | Big opportunity, but market education is required |
|
||||
| **Using clumsy methods** | Excel, manual work, multi-person collaboration | Good opportunity, users want better solutions |
|
||||
| **Combining multiple tools** | Tool A + Tool B + Tool C | Good opportunity, integration has value |
|
||||
| **Using mature products** | But users are unsatisfied | Opportunity exists, but differentiation is needed |
|
||||
| **Using mature products** | Users are satisfied | Very small opportunity unless there is disruptive innovation |
|
||||
|
||||
::: tip What is "disruptive innovation"?
|
||||
It is not just "better features"; it is usually a simpler/cheaper way to serve ignored users, then expanding upward over time.
|
||||
|
||||
**Simple definition:** not making products incrementally better, but serving previously overlooked user groups with a simpler/cheaper approach.
|
||||
|
||||
**Examples:**
|
||||
- Traditional phones -> smartphones (not just more functions, but a completely different interaction model)
|
||||
- Traditional taxis -> Didi/Uber (not better cars, but on-demand ride calling anywhere)
|
||||
- Traditional bookstores -> e-books (not more books, but easier carrying and purchasing)
|
||||
|
||||
**Key point:** disruptive innovation often starts from low-end markets or new user groups, and then gradually moves upward.
|
||||
|
||||
:::
|
||||
|
||||
**Most direct validation method: pre-order/deposit**
|
||||
**Cases:**
|
||||
- Diabetics currently control diet by "experience + guessing" (very clumsy method) -> big opportunity
|
||||
- Ordinary dieters use Bohe Health (mature product, medium satisfaction) -> opportunity for vertical segmentation
|
||||
- Students use WeChat groups for second-hand trading (multiple tools stitched together) -> opportunity for integration
|
||||
|
||||
1. Build a simple landing page
|
||||
2. Add a pre-order or reservation button
|
||||
3. Measure how many users actually pay (even a small deposit counts)
|
||||
**Most effective method: presale or deposit**
|
||||
|
||||
Decision threshold:
|
||||
- Deposit conversion > 10%: demand is likely real
|
||||
- Deposit conversion 5%-10%: demand exists but product needs refinement
|
||||
- Deposit conversion < 5%: concept likely not working yet
|
||||
**Steps:**
|
||||
1. Create a simple landing page and describe your product concept
|
||||
2. Put a "presale" or "reservation" button
|
||||
3. See how many people are willing to pay (even 1 RMB counts)
|
||||
|
||||
Simple market formula:
|
||||
**Decision criteria:**
|
||||
- Users willing to pay deposit > 10%: demand is real and worth doing
|
||||
- 5%-10%: demand exists but needs refinement
|
||||
- < 5%: demand may not be valid, or product concept has issues
|
||||
|
||||
**Note:** many people say "I will buy." The people who actually pay are your real target users.
|
||||
|
||||
**Simple formula:**
|
||||
```text
|
||||
Market size = target user count × willingness to pay × average order value
|
||||
Potential market size = target user count × willingness to pay × average order value
|
||||
```
|
||||
|
||||
**Early-stage moat reality:**
|
||||
- Most early products do not start with a strong moat
|
||||
- That's normal; speed of execution matters first
|
||||
- Build traction first, then strengthen barriers
|
||||
**Case: campus second-hand trading platform**
|
||||
- Target users: 40 million college students in China
|
||||
- With second-hand trading demand: 50% = 20 million
|
||||
- Willing to use platform: 10% = 2 million
|
||||
- Annual transaction frequency: 2 times
|
||||
- Platform commission: 5%
|
||||
- Average order value: 100 RMB
|
||||
- Potential market size = 2,000,000 × 2 × 100 × 5% = 20 million RMB/year
|
||||
|
||||
**Decision criteria:**
|
||||
- Market size > 1 billion RMB: large track, worth pursuing
|
||||
- 100 million-1 billion RMB: medium/small track, possible but ceiling is visible
|
||||
- < 100 million RMB: niche market, suitable for side business or a small-and-beautiful business
|
||||
|
||||
**Key question:** if the product succeeds, what if others copy it?
|
||||
|
||||
**Common moat types:**
|
||||
|
||||
| Moat Type | Description | Example |
|
||||
|-----------|------|------|
|
||||
| **Network effects** | More users -> more product value | WeChat, Didi |
|
||||
| **Data accumulation** | More data -> better algorithm | Toutiao, Douyin |
|
||||
| **Brand cognition** | Occupying user mindshare | Coca-Cola, Nike |
|
||||
| **Scale effects** | Larger scale -> lower costs | JD logistics, Amazon |
|
||||
| **Technical patents** | Core technology barriers | Huawei, DJI |
|
||||
| **Switching costs** | High migration cost for users | Enterprise software, operating systems |
|
||||
|
||||
**Early-stage reality:**
|
||||
- Most early projects do not have clear moats
|
||||
- But that is fine; the key is to **move fast**
|
||||
- Occupy market first, then build barriers
|
||||
|
||||
---
|
||||
|
||||
### Appendix B: Horizontal Segmentation Method
|
||||
### Appendix B: Horizontal user-segmentation method
|
||||
|
||||
Do not target "all X users." Choose one specific segment with sharper pain and clearer willingness to pay.
|
||||
Do not try to serve "all XX users." Instead, find **one specific group** with sharper and more concrete needs.
|
||||
|
||||
Steps:
|
||||
1. List all possible subsegments
|
||||
2. Evaluate each segment
|
||||
3. Pick one and go deep
|
||||
**Step 1: List all possible segmented user groups**
|
||||
|
||||
| Evaluation Dimension | Explanation |
|
||||
For your product concept, list all possible user groups.
|
||||
|
||||
**Step 2: Evaluate the business value of each group**
|
||||
|
||||
| Evaluation Dimension | Description |
|
||||
|---------|------|
|
||||
| Pain intensity | Pain point or itch point? |
|
||||
| Willingness to pay | How much would they pay? |
|
||||
| Market size | How many users are there? |
|
||||
| Competition | Are existing solutions already good enough? |
|
||||
| Your understanding | Do you understand this group and have access channels? |
|
||||
| Pain intensity | Is this group's need a pain point or itch point? |
|
||||
| Willingness to pay | How much are they willing to pay for a solution? |
|
||||
| Market size | How many people are in this group? |
|
||||
| Competition level | Are current solutions satisfactory? |
|
||||
| Your understanding of this group | Do you understand this group? Do you have access channels? |
|
||||
|
||||
Selection priority:
|
||||
- Highest pain intensity
|
||||
- Strongest payment intention
|
||||
- Best domain understanding from you
|
||||
- Relatively lower competitive pressure
|
||||
**Step 3: Choose one group for deep analysis**
|
||||
|
||||
Choose the one that is:
|
||||
- most painful
|
||||
- highest willingness to pay
|
||||
- best understood by you
|
||||
- relatively less competitive
|
||||
|
||||
::: tip Segmentation Example
|
||||
|
||||
**Product concept:** bookkeeping app
|
||||
|
||||
| Segmented Group | Pain Point | Willingness to Pay | Market Size | Competition |
|
||||
|---------|------|---------|---------|---------|
|
||||
| Ordinary office workers | Recording is troublesome | Low | Large | High |
|
||||
| Small business owners | Personal/company spending is mixed up | High | Medium | Medium |
|
||||
| Freelancers | Unstable income, need cash-flow forecast | High | Medium | Medium |
|
||||
| Parents of overseas students | Want to know child's spending but child does not say | High | Small | Low |
|
||||
|
||||
**Chosen segment:** parents of overseas students (strongest pain point, high willingness to pay, relatively low competition)
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Appendix C: Vertical Scenario Deep-Dive Method
|
||||
### Appendix C: Vertical scenario deep-dive method
|
||||
|
||||
After choosing a segment, do not stop at features. Understand the full context and emotional journey.
|
||||
After finding the user group, do not stop at a single feature. You need to understand the user's **complete scenario**.
|
||||
|
||||
Steps:
|
||||
1. Describe the user's day end-to-end
|
||||
2. Find pain points in each scenario
|
||||
3. Identify emotional triggers (fear, anxiety, helplessness, loneliness, frustration)
|
||||
4. Rebuild product value around scenario + emotion
|
||||
**Step 1: Describe one full day of the user**
|
||||
|
||||
The goal is to evolve from "tool feature" to "complete solution."
|
||||
From morning to night, describe the complete scenario in which the user interacts with your product.
|
||||
|
||||
**Step 2: Analyze pain points in each scenario**
|
||||
|
||||
In each scenario, what problems does the user encounter? What emotions appear?
|
||||
|
||||
**Step 3: Find emotional touchpoints**
|
||||
|
||||
Fear, anxiety, helplessness, loneliness, anger, regret...
|
||||
|
||||
**Step 4: Reconstruct value**
|
||||
|
||||
Based on scenarios and emotions, reconstruct product value.
|
||||
|
||||
::: tip Deep-Dive Example
|
||||
|
||||
**User group:** postpartum moms
|
||||
|
||||
| Time | Scenario | Pain Point | Emotion |
|
||||
|------|------|------|------|
|
||||
| 6 AM | Baby just fell asleep, 30 minutes free | Do not know what movement is safe | Fear |
|
||||
| 10 AM | Holding baby to help sleep, lower back soreness | Hands occupied, wants recovery exercise | Anxiety |
|
||||
| 3 PM | Baby sleeping, wants to exercise | Body is tired, unsure if can continue | Helplessness |
|
||||
| 8 PM | Finally has time | Sees body in mirror and feels life is over | Depression |
|
||||
| Long term | No one understands | Feels like only self suffers this much | Loneliness |
|
||||
|
||||
**Reconstructed value:** upgrade from "fitness tool" to "rehab coach + psychological supporter"
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Appendix D: More Examples of Idea Reframing
|
||||
### Appendix D: More examples from ordinary ideas to great ideas
|
||||
|
||||
#### Example 1: From "Bookkeeping App" to "Overseas Spending Steward"
|
||||
#### Example 1: From "bookkeeping app" to "Overseas Spending Steward"
|
||||
|
||||
- Generic idea: automatic spending categorization
|
||||
- Core problem shift: not bookkeeping, but parents' loss of control over overseas spending visibility
|
||||
- Reframed value: real-time child spending sync + overspending alerts + monthly analysis + peer benchmark insights
|
||||
**Ordinary idea:** automatic bookkeeping app, connecting bank cards to auto-categorize spending
|
||||
|
||||
#### Example 2: From "Pomodoro Tool" to "Remote Work Proof"
|
||||
**Problem:** there are already SuiShouJi, WaCai, Alipay bills...
|
||||
|
||||
- Generic idea: focus timer
|
||||
- Core problem shift: not focus itself, but trust gap between remote worker and manager
|
||||
- Reframed value: time tracking + productivity reports + privacy-safe activity summaries + auto daily report
|
||||
**Horizontal segmentation:**
|
||||
- Parents of overseas students: want to know how much their child spends abroad and whether they overspend
|
||||
|
||||
#### Example 3: From "Used Book Marketplace" to "Picture Book Subscription Library"
|
||||
**Vertical deep dive:**
|
||||
- Pain point is not bookkeeping but **"loss of control"** - do not know how much the child spends or where money goes
|
||||
- Scenario: every month parents see credit-card bills, but the child never proactively explains spending
|
||||
|
||||
- Generic idea: used book trading
|
||||
- Core problem shift: short lifecycle of kids' picture books
|
||||
- Reframed value: monthly age-fit picture book rotation + progress tracking + hygiene guarantee
|
||||
**Reconstructed concept:** "Overseas Spending Steward" - not only bookkeeping, but letting parents "have clear visibility" on overseas spending
|
||||
|
||||
**Core features:**
|
||||
- Real-time child spending sync
|
||||
- Overspending alerts
|
||||
- Monthly spending analysis reports
|
||||
- Peer comparison among similar students ("your child spends 20% above average")
|
||||
|
||||
---
|
||||
|
||||
### Appendix E: 5-Step AI Dialogue Method for Concept Refinement
|
||||
#### Example 2: From "Pomodoro tool" to "Remote Work Proof"
|
||||
|
||||
Use multi-round AI dialogue to turn rough ideas into executable plans.
|
||||
**Ordinary idea:** Pomodoro app to help users focus
|
||||
|
||||
**Step 1: Submit raw idea and concerns**
|
||||
**Problem:** phones already have screen-time stats, plus Forest and Pomodoro Todo...
|
||||
|
||||
**Horizontal segmentation:**
|
||||
- Remote workers: need to prove to managers that they are truly working
|
||||
|
||||
**Vertical deep dive:**
|
||||
- Pain point is not "cannot focus," but **"trust crisis"** - if manager cannot see me, how do I prove I am working?
|
||||
- Scenario: every day after work, manager asks "how was your progress today?" and there is no proof
|
||||
|
||||
**Reconstructed concept:** "Remote Work Proof" - helping remote workers build trust with employers
|
||||
|
||||
**Core features:**
|
||||
- Automatic work-time tracking
|
||||
- Productivity reports
|
||||
- Screen activity summaries (privacy-protected)
|
||||
- Auto-generated daily work report sent to supervisor
|
||||
|
||||
---
|
||||
|
||||
#### Example 3: From "second-hand book trading" to "Picture Book Library"
|
||||
|
||||
**Ordinary idea:** second-hand book trading platform
|
||||
|
||||
**Problem:** there are already Duozhuayu, Xianshu, and Kongfuzi used-book marketplaces...
|
||||
|
||||
**Horizontal segmentation:**
|
||||
- Mom users: children's picture books become idle after reading, but buying new books is expensive
|
||||
|
||||
**Vertical deep dive:**
|
||||
- Pain point is not "books are expensive," but **"short lifecycle of picture books"** - books for age 3 are not read at age 4
|
||||
- Scenario: home is full of picture books that children no longer read, but throwing them away feels wasteful
|
||||
|
||||
**Reconstructed concept:** "Picture Book Library delivered to your home" - not selling used books, but providing "rental of usage rights"
|
||||
|
||||
**Core features:**
|
||||
- Picture book subscription (mail 5 age-appropriate books each month, return after reading, then rotate new ones)
|
||||
- Reading progress tracking
|
||||
- Age-appropriate recommendations
|
||||
- Sterilization guarantee
|
||||
|
||||
---
|
||||
|
||||
### Appendix E: 5-step method to refine product concepts via AI dialogue
|
||||
|
||||
Use multi-round AI dialogue to gradually refine ordinary ideas into precise, executable product concepts.
|
||||
|
||||
**Operation:**
|
||||
- Describe your initial idea (even if rough)
|
||||
- Tell AI your concerns (heavy competition, unclear differentiation, etc.)
|
||||
|
||||
**Prompt:**
|
||||
```text
|
||||
I want to build [product concept], but I found [problem/concern].
|
||||
I want to build [product concept],
|
||||
but I found [problem/concern].
|
||||
```
|
||||
|
||||
**Step 2: Ask AI for MVP plan**
|
||||
**Operation:**
|
||||
- Ask AI to create a minimum viable product plan
|
||||
- Discuss implementation difficulty and costs
|
||||
- Define validation metrics
|
||||
|
||||
**Prompt:**
|
||||
```text
|
||||
Please help me:
|
||||
1. Plan an MVP
|
||||
2. Provide concrete technical suggestions
|
||||
2. Provide concrete technical implementation advice
|
||||
3. Estimate cost
|
||||
4. Define validation metrics
|
||||
```
|
||||
|
||||
**Step 3: Raise concrete concerns**
|
||||
**Operation:**
|
||||
- Technical difficulty?
|
||||
- Content production cost?
|
||||
- Promotion cost?
|
||||
- User acquisition difficulty?
|
||||
|
||||
**Prompt:**
|
||||
```text
|
||||
I worry about:
|
||||
I am worried about:
|
||||
1. [Concern 1]
|
||||
2. [Concern 2]
|
||||
3. [Concern 3]
|
||||
```
|
||||
|
||||
**Step 4: Request option comparison and solutions**
|
||||
**Operation:**
|
||||
- Provide concrete solutions for your concerns
|
||||
- Compare multiple options and choose the best
|
||||
- Estimate costs
|
||||
|
||||
**Prompt:**
|
||||
```text
|
||||
Please provide specific solutions for my concerns.
|
||||
Please provide concrete solutions for my concerns.
|
||||
```
|
||||
|
||||
**Step 5: Confirm final action plan**
|
||||
**Operation:**
|
||||
- Organize a clear action plan
|
||||
- Set validation metrics
|
||||
- If metrics are not met, adjust direction quickly
|
||||
|
||||
**Prompt:**
|
||||
```text
|
||||
Please help me organize a clear action plan.
|
||||
```
|
||||
|
||||
::: tip Key Techniques
|
||||
- Multi-round iteration: don't expect one perfect answer
|
||||
- Add context: your observations, lived experience, user feedback
|
||||
- Challenge weak AI suggestions when needed
|
||||
- Always end with an actionable execution plan
|
||||
::: tip Key techniques
|
||||
|
||||
- **Multi-round dialogue:** do not expect a perfect answer in one round; iterate
|
||||
- **Provide information:** tell AI your observations, experiences, and people-around-you feedback
|
||||
- **Challenge AI:** if AI suggestions are unreasonable, call that out in time
|
||||
- **Focus on execution:** always end with a concrete action plan
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Appendix F: Demand Validation Checklist
|
||||
### Appendix F: Need validation checklist
|
||||
|
||||
Before investing development time, verify this checklist. The core question is: **will users pay for this?**
|
||||
Before deciding to invest development time, validate your idea with the checklist below - **the core question is always: will users pay for this?**
|
||||
|
||||
::: tip Need Validation Checklist
|
||||
|
||||
::: tip Validation Checklist
|
||||
**1. User profile clarity**
|
||||
- ☐ Can you describe the target user in one sentence?
|
||||
- ☐ Do you know their current alternative?
|
||||
- ☐ Can you describe specific usage scenarios?
|
||||
- ☐ Does this group have purchasing power?
|
||||
- ☐ Can you describe your target user in one sentence?
|
||||
- ☐ Can you state what alternative they currently use?
|
||||
- ☐ Can you describe specific details of their usage scenario?
|
||||
- ☐ Does this user group have payment capability?
|
||||
|
||||
**2. Pain intensity**
|
||||
- ☐ What cost do users pay now (time/money/effort)?
|
||||
- ☐ What happens if this problem stays unsolved?
|
||||
- ☐ Are users actively searching for solutions?
|
||||
- ☐ How much are they willing to pay?
|
||||
**2. Pain intensity evaluation**
|
||||
- ☐ What cost do users pay now to solve this problem? (time/money/effort)
|
||||
- ☐ If they do not solve it, what consequence follows?
|
||||
- ☐ Are users actively seeking solutions?
|
||||
- ☐ How much are users willing to pay for this?
|
||||
|
||||
**3. Differentiation**
|
||||
- ☐ What is your advantage over existing options?
|
||||
- ☐ Is that advantage strong enough to trigger switching?
|
||||
- ☐ Is your capability hard to copy quickly?
|
||||
- ☐ Is differentiation strong enough to support payment?
|
||||
**3. Solution differentiation**
|
||||
- ☐ Compared with existing solutions, what is your advantage?
|
||||
- ☐ Is that advantage strong enough to make users switch?
|
||||
- ☐ How hard is it for big platforms to copy your feature?
|
||||
- ☐ Is your differentiation enough to support paid conversion?
|
||||
|
||||
**4. Business viability**
|
||||
- ☐ Will users really pay? How much? (must be tested)
|
||||
- ☐ What is approximate CAC?
|
||||
- ☐ Can LTV cover CAC?
|
||||
- ☐ Do you have additional monetization paths?
|
||||
**4. Business model feasibility**
|
||||
- ☐ Are users willing to pay? How much? (must be tested in reality)
|
||||
- ☐ What is rough customer acquisition cost?
|
||||
- ☐ Can user lifetime value (LTV) cover customer acquisition cost (CAC)?
|
||||
- ☐ Are there additional monetization paths? (ads, value-added services, B2B, etc.)
|
||||
|
||||
**5. Rapid validation plan**
|
||||
- ☐ Can you build a testable prototype with minimum cost in 1-2 weeks?
|
||||
- ☐ Can you find 10 target users for interviews?
|
||||
- ☐ Can you design an experiment to validate the core hypothesis?
|
||||
- ☐ Can you ask users to prepay deposits to validate willingness to pay?
|
||||
|
||||
**5. Rapid validation**
|
||||
- ☐ Can you build a testable prototype in 1-2 weeks with minimum cost?
|
||||
- ☐ Can you interview 10 target users?
|
||||
- ☐ Can you design an experiment to test the core hypothesis?
|
||||
- ☐ Can you test willingness to pay with real deposits?
|
||||
:::
|
||||
|
||||
Do not ask "Would you use this?" It usually gives false positives.
|
||||
**Do not ask "Will you use this product?"**
|
||||
This question mostly gives false positives.
|
||||
|
||||
Ask behavior-first questions:
|
||||
- "How do you solve this now?"
|
||||
- "How many times did this bother you in the last week?"
|
||||
- "If the solution requires behavior change, will you change?"
|
||||
- "Would you buy at price X?"
|
||||
**Ask instead:**
|
||||
- "How do you currently solve this problem?" (understand real behavior)
|
||||
- "How many times did this problem bother you in the last week?" (understand frequency)
|
||||
- "If there is a solution, but it requires changing your current habit, are you willing?" (understand change cost)
|
||||
- "If it costs XX RMB, will you buy?" (understand willingness to pay)
|
||||
|
||||
Best validation is still real prepaid commitment.
|
||||
**Best validation:** ask users to prepay deposits. Many people say they are willing to pay, but those who actually pay are your real target users.
|
||||
|
||||
**Key metrics:**
|
||||
- Deposit-paying user ratio > 10%: demand is real and worth investment
|
||||
- Deposit-paying ratio 5%-10%: demand exists but needs refinement
|
||||
- Deposit-paying ratio < 5%: demand is invalid, or product concept has issues
|
||||
|
||||
---
|
||||
|
||||
## Chapter Summary
|
||||
|
||||
In this chapter, through Xiao Ming's story, we learned to evaluate ideas with a product-manager mindset. The core question remained unchanged: **will users pay for this?**
|
||||
In this chapter, through Xiao Ming's story, we learned how to evaluate product ideas from a product-manager perspective - **the core is always: will users pay for this?**
|
||||
|
||||
::: info Core Points
|
||||
**1. Three standards of real demand**
|
||||
- Users are willing to pay
|
||||
- Users are willing to change behavior
|
||||
- Without a solution, users experience clear loss
|
||||
::: info Core points
|
||||
|
||||
**2. Path from ordinary idea to paid product**
|
||||
- Horizontal segmentation: find specific users with stronger payment intent
|
||||
- Vertical deep dive: understand complete scenario and emotional context
|
||||
- Value reframing: evolve from feature/tool to complete solution
|
||||
**1. Three standards of real demand:**
|
||||
- Users are willing to pay for it (the most important standard)
|
||||
- Users are willing to change behavior for it
|
||||
- If no solution exists, users suffer clear loss
|
||||
|
||||
**3. Avoid fake-demand traps**
|
||||
- Solving itch points instead of pain points
|
||||
- Market too small to support the business model
|
||||
- Solution complexity higher than problem value
|
||||
**2. Path from ordinary idea to product people will pay for:**
|
||||
- <strong>Horizontal segmentation:</strong> find a specific user group, and the more segmented, the stronger willingness to pay
|
||||
- <strong>Vertical deep dive:</strong> understand complete scenarios, solving emotions rather than only functions
|
||||
- <strong>Value reconstruction:</strong> evolve from tools into solutions and build reasons to pay
|
||||
|
||||
**4. Validate payment intention early**
|
||||
**3. Avoid fake-demand traps:**
|
||||
- Solving pseudo pain points (itch points instead of pain points)
|
||||
- Market size is too small to support a business model
|
||||
- Solution is more complex than the problem itself
|
||||
|
||||
**4. How to validate willingness to pay:**
|
||||
- Interview 10 target users in depth
|
||||
- Use prepaid deposits to test real willingness
|
||||
- >10% deposit conversion is a practical signal to continue
|
||||
- Ask users to prepay deposits to verify true willingness
|
||||
- Only when deposit-paying ratio > 10% is it worth investing
|
||||
|
||||
**5. Use AI dialogue for refinement**
|
||||
**5. Refine product concepts with AI dialogue:**
|
||||
- Iterate through multiple rounds
|
||||
- Stay execution-focused
|
||||
- Set measurable validation metrics and adjust quickly
|
||||
- Focus on execution and action plans
|
||||
- Set validation metrics and adjust direction promptly
|
||||
|
||||
:::
|
||||
|
||||
**Remember:** strong product managers do not invent demand from thin air. They discover real needs that are ignored, underestimated, or poorly served, then build products users are willing to pay for.
|
||||
**Remember:** good product managers do not create demand from thin air. They discover real needs that are <strong>ignored, underestimated, or poorly satisfied</strong>, then find ways to make users willing to pay.
|
||||
|
||||
In the next chapter, we will bring validated ideas and start learning how to use AI IDE to turn them into interactive product prototypes.
|
||||
|
||||
@@ -1231,139 +1231,3 @@ If you encounter new words in the learning process later, you can also have AI a
|
||||
- Then write a sentence about "what you can imagine it as";
|
||||
- Finally give a particularly simple small example.
|
||||
This way your "personal glossary" will grow longer and more practical, gradually enabling better communication with computers.
|
||||
|
||||
---
|
||||
|
||||
# Appendix 2: Visual Studio Code Menu Bar Analysis
|
||||
|
||||
To help everyone understand the meaning of each option, here we provide an in-depth analysis of the menu bar:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
<details class="custom-block details" id="vscode-file-menu">
|
||||
<summary>File: Project and File Open/Save/Workspace Management</summary>
|
||||
|
||||
This menu is mainly responsible for: **Creating/Opening Files**, **Opening Project Folders**, **Managing Workspaces**, **Saving and Closing**.
|
||||
|
||||
> The most commonly used are: Open Folder to open a project; Open… to open a single file; then use Save / Save All to save changes, and finally use Close Editor / Close Folder to end the current work. Workspace-related content can be slowly learned as you get more projects, no need to understand everything at once.
|
||||
|
||||
- **New Text File**: Create a new unnamed text buffer for temporary notes or quick pasting.
|
||||
- **New File…**: Create a new file in the project (usually asks you to choose path/name).
|
||||
- **New Window**: Open a new VS Code window instance.
|
||||
- **New Window with Profile**: Open a new window with a specified Profile (extension/settings combination), suitable for isolating environments for different courses/projects.
|
||||
- **Open…**: Open a single file for editing.
|
||||
- **Open Folder…**: Open a folder as the project root directory (the most commonly used "open project" method).
|
||||
- **Open Workspace from File…**: Open a `.code-workspace` file to load a workspace with multiple folders/specific settings.
|
||||
- **Open Recent**: Quickly access recently opened files/folders/workspaces.
|
||||
- **Add Folder to Workspace…**: Add another folder to the current workspace (forming a multi-root workspace).
|
||||
- **Save Workspace As…**: Save the current workspace structure as a `.code-workspace` file for easy sharing/reuse.
|
||||
- **Duplicate Workspace**: Duplicate the current workspace configuration (commonly used to create similar project environments).
|
||||
- **Save**: Save changes to the current file.
|
||||
- **Save As…**: Save the current file with a new name/path.
|
||||
- **Save All**: Save all opened files that have modifications.
|
||||
- **Share**: Entry related to sharing/collaboration (specific content depends on version and extensions).
|
||||
- **Auto Save**: Toggle auto-save strategy (e.g., delayed save/focus change save).
|
||||
- **Revert File**: Discard unsaved changes to the current file and revert to the disk version.
|
||||
- **Close Editor**: Close the current tab.
|
||||
- **Close Folder**: Close the current project folder (workspace becomes empty).
|
||||
- **Close Window**: Close the current VS Code window.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-edit-menu">
|
||||
<summary>Edit: Basic Editing, Find/Replace, Comments and Quick Edit Actions</summary>
|
||||
|
||||
This menu is mainly responsible for: **Undo/Redo**, **Cut/Copy/Paste**, **Find/Replace**, **Comments and Editor Actions** (improving editing efficiency).
|
||||
|
||||
- **Undo / Redo**: The most basic operations for when you write code wrong.
|
||||
- **Cut / Copy / Paste**: Text transportation.
|
||||
- **Find / Replace**: Search or batch modify in the current file.
|
||||
- **Find in Files / Replace in Files**: Global (whole project) search and replace, very powerful but use with caution.
|
||||
- **Toggle Line Comment**: `Ctrl + /`, quickly comment/uncomment the current line.
|
||||
- **Toggle Block Comment**: `Shift + Alt + A`, quickly comment/uncomment the selected area.
|
||||
- **Emmet: Expand Abbreviation**: A powerful tool for HTML/CSS development, type shorthand and press Tab to expand code.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-selection-menu">
|
||||
<summary>Selection: Multi-cursor and Smart Selection</summary>
|
||||
|
||||
This menu is mainly responsible for: **Cursor Control**, **Multi-line Editing**, **Expand/Shrink Selection**. This is VS Code's killer feature for improving efficiency.
|
||||
|
||||
- **Select All**: Select all content in the current file.
|
||||
- **Expand Selection / Shrink Selection**: Intelligently perceive syntax structure, gradually expand or shrink the selection range (e.g., word -> string -> inside parentheses -> whole line -> function body).
|
||||
- **Copy Line Up / Down**: Quickly clone the current line.
|
||||
- **Move Line Up / Down**: `Alt + ↑ / ↓`, adjust code line order directly without cut and paste.
|
||||
- **Add Cursor Above / Below**: `Ctrl + Alt + ↑ / ↓`, enable multi-cursor mode to edit multiple lines simultaneously.
|
||||
- **Add Cursor to Line Ends**: After selecting multiple lines of text, add a cursor at the end of each line.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-view-menu">
|
||||
<summary>View: Interface Layout and Panel Control</summary>
|
||||
|
||||
This menu is mainly responsible for: **Toggle Sidebar/Panel**, **Adjust Layout**, **Command Palette**, **Output and Debug Console**.
|
||||
|
||||
- **Command Palette…**: `Ctrl + Shift + P` / `F1`, VS Code's central command center, can search and execute all commands.
|
||||
- **Open View…**: Quickly open specific sidebar views (such as Explorer, Source Control).
|
||||
- **Appearance**: Control fullscreen, menu bar visibility, sidebar position, zoom level (Zoom In/Out).
|
||||
- **Editor Layout**: Split editor (Split Up/Down/Left/Right) for side-by-side code comparison.
|
||||
- **Explorer / Search / Source Control / Run / Extensions**: Directly switch views in the Activity Bar.
|
||||
- **Problems / Output / Debug Console / Terminal**: Directly control the display content of the bottom panel.
|
||||
- **Word Wrap**: `Alt + Z`, control whether long lines of code automatically wrap (does not affect actual file content).
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-go-menu">
|
||||
<summary>Go: Code Navigation and Jumping</summary>
|
||||
|
||||
This menu is mainly responsible for: **Jumping Between Files**, **Jumping Between Symbols (Functions/Variables)**.
|
||||
|
||||
- **Back / Forward**: Like a browser, jump between your cursor history positions.
|
||||
- **Switch Editor…**: Quickly switch between opened tabs.
|
||||
- **Go to File…**: `Ctrl + P`, type filename to quickly open files.
|
||||
- **Go to Symbol in Editor…**: `Ctrl + Shift + O`, list functions/classes/variables in the current file for quick jumping.
|
||||
- **Go to Definition**: `F12`, jump to the definition of the variable or function at the cursor.
|
||||
- **Go to References**: `Shift + F12`, see where this variable or function is used.
|
||||
- **Go to Line/Column…**: `Ctrl + G`, jump to a specified line number.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-run-menu">
|
||||
<summary>Run: Debugging and Execution</summary>
|
||||
|
||||
This menu is mainly responsible for: **Start Debugging**, **Breakpoint Management**.
|
||||
|
||||
- **Start Debugging**: `F5`, run the program in debug mode (supports breakpoints, variable watching).
|
||||
- **Run Without Debugging**: `Ctrl + F5`, run the program directly without attaching a debugger (slightly faster).
|
||||
- **Stop Debugging**: Forcefully end the current debugging session.
|
||||
- **Restart Debugging**: Run again.
|
||||
- **Toggle Breakpoint**: `F9`, add or remove a red dot (breakpoint) on the current line.
|
||||
- **New Breakpoint**: Supports conditional breakpoints, log breakpoints, and other advanced features.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-terminal-menu">
|
||||
<summary>Terminal: Integrated Command Line</summary>
|
||||
|
||||
This menu is mainly responsible for: **New Terminal**, **Manage Terminal Windows**.
|
||||
|
||||
- **New Terminal**: Open a new Shell (PowerShell/Bash/Zsh) in the bottom panel.
|
||||
- **Split Terminal**: Split left/right/up/down in the same terminal panel to run multiple commands simultaneously.
|
||||
- **Run Task…**: Run build/test tasks defined in `tasks.json`.
|
||||
|
||||
</details>
|
||||
|
||||
<details class="custom-block details" id="vscode-help-menu">
|
||||
<summary>Help: Documentation and Feedback</summary>
|
||||
|
||||
- **Welcome**: Open the welcome page (contains getting started guide, recent projects).
|
||||
- **Show All Commands**: Same as Command Palette.
|
||||
- **Documentation**: Jump to official documentation.
|
||||
- **Editor Playground**: Interactive tutorial for learning editing techniques.
|
||||
- **Check for Updates…**: Manually check for updates.
|
||||
- **About**: View version number, build time, Electron/Node version information.
|
||||
|
||||
</details>
|
||||
|
||||
@@ -1,537 +1,621 @@
|
||||
---
|
||||
title: 'Building Prototypes - From Business Analysis to Multi-page Product Prototype Implementation'
|
||||
description: 'Experience the complete loop from business analysis to multi-page product prototype implementation. Learn how to ask business questions, break down requirements, use AI IDE to generate single-page and multi-page applications, and beautify and test prototypes.'
|
||||
title: 'Build a Prototype Hands-On - From Business Analysis to Multi-Page Product Prototype Implementation'
|
||||
description: 'Experience the complete loop from business analysis to multi-page product prototype implementation. Learn how to ask business questions, break down requirements, use an AI IDE to generate single-page and multi-page apps, and polish and test prototypes.'
|
||||
---
|
||||
|
||||
<script setup>
|
||||
import { relatedArticlesMap } from '@theme/data/relatedArticles'
|
||||
|
||||
const duration = 'About <strong>8 hours</strong>'
|
||||
const relatedArticles =
|
||||
relatedArticlesMap['en/stage-1/1.2-building-prototype'] ?? []
|
||||
</script>
|
||||
|
||||
# Beginner Level 3: Building Prototypes
|
||||
# Beginner 3: Build a Prototype Hands-On
|
||||
|
||||
## Chapter Overview
|
||||
## Chapter Introduction
|
||||
|
||||
<ChapterIntroduction :duration="duration" :tags="['Business Analysis', 'Prototype Design', 'AI-Assisted Programming', 'Multi-page Applications']" coreOutput="1 e-commerce material workbench prototype" expectedOutput="Interactive Web prototype">
|
||||
<ChapterIntroduction :duration="duration" :tags="['Business Analysis', 'Prototype Design', 'AI-Assisted Coding', 'Multi-Page Applications']" coreOutput="1 E-commerce Asset Workbench Prototype" expectedOutput="An Interactive Web Prototype">
|
||||
|
||||
In the previous chapter, we learned how to <strong>find great ideas</strong> — starting from user needs to find directions people are willing to pay for. But finding direction is just the first step. <strong>What really tests a product manager is: how to turn vague needs into usable products.</strong>
|
||||
In the previous chapter, we learned how to <strong>find a great idea</strong> - starting from user needs and finding directions people are willing to pay for. But finding direction is only step one. <strong>What really tests a product manager is: how to turn vague requirements into a usable product.</strong>
|
||||
|
||||
This chapter solves a <strong>real problem</strong>: Your boss gives you a vague but high-pressure task: "Use AI to improve the efficiency of publishing products to e-commerce platforms" — how do you turn this into a <strong>usable product prototype</strong>?
|
||||
In this chapter, we solve one <strong>real-world problem</strong>: your boss throws one sentence at you, "Use AI to improve the efficiency of publishing products to e-commerce platforms." How do you turn that into a <strong>usable product prototype</strong>?
|
||||
|
||||
Unlike building Snake or calculators before, <strong>real business can't just imagine features</strong>:
|
||||
Unlike building Snake or a calculator, <strong>real business work cannot rely on imagined features</strong>:
|
||||
|
||||
1. <strong>Clarify pain points</strong>: Talk to operations, dig out the <strong>real pain points</strong> from the vague "improve efficiency"
|
||||
2. <strong>Prioritize</strong>: Among many problems, solve the <strong>most painful one</strong> first, don't try to do everything at once
|
||||
3. <strong>Quick validation</strong>: Use AI IDE to build a <strong>single-page prototype</strong> first, then expand to multi-page after it works
|
||||
4. <strong>Make something usable</strong>: Finally deliver an <strong>e-commerce material workbench that can be demonstrated and operated</strong>
|
||||
1. <strong>Clarify pain points</strong>: talk to operations and dig out the <strong>real pain points</strong> hidden behind the vague phrase "improve efficiency"
|
||||
2. <strong>Prioritize</strong>: among many problems, solve the <strong>most painful one</strong> first, instead of trying to do everything at once
|
||||
3. <strong>Validate quickly</strong>: use an AI IDE to build a <strong>single-page prototype</strong> first; once it works, expand to multiple pages
|
||||
4. <strong>Deliver something usable</strong>: finally deliver an <strong>e-commerce asset workbench that can be demonstrated and operated</strong>
|
||||
|
||||
We'll learn the transition from <strong>making toys to making applications</strong>, and learn to <strong>empathize and think about customers' real needs</strong>.
|
||||
We will learn the shift from <strong>building toys to building applications</strong>, and learn how to <strong>empathize and think from real customer needs</strong>.
|
||||
|
||||
</ChapterIntroduction>
|
||||
|
||||
::: info Note
|
||||
This chapter may contain some business terminology. If you don't understand something, you can ask AI for an explanation.
|
||||
This chapter contains some business terms. If you do not understand one, ask AI for an explanation.
|
||||
:::
|
||||
|
||||
<div style="margin: 50px 0;">
|
||||
<ClientOnly>
|
||||
<StepBar :active="0" :items="[
|
||||
{ title: 'Requirements Analysis', description: 'From vague to specific' },
|
||||
{ title: 'Single Page Validation', description: 'Core functionality implementation' },
|
||||
{ title: 'Multi-page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Beautification', description: 'Improve user experience' }
|
||||
{ title: 'Requirement Analysis', description: 'From vague to concrete' },
|
||||
{ title: 'Single-Page Validation', description: 'Implement the core gameplay' },
|
||||
{ title: 'Multi-Page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Polish and Refine', description: 'Improve user experience' }
|
||||
]" />
|
||||
</ClientOnly>
|
||||
</div>
|
||||
|
||||
## 1. Define Requirements Before Coding
|
||||
## 1. Define Requirements Before Writing Code
|
||||
|
||||
In previous tutorials, we used AI IDE to easily generate Snake and various mini-games, but these are just toy projects that can't be applied in work and life. If we want AI capabilities to truly serve everyone, we should combine vibe coding with real life and work scenarios.
|
||||
In earlier tutorials, we used AI IDE tools to quickly generate Snake and mini-games. But those are toy projects and are not directly useful in daily work and life. If we want AI capability to truly create value, we should combine vibe coding with real work and life scenarios.
|
||||
|
||||
In the last chapter, we learned how to find <strong>great ideas that people are willing to pay for</strong>, but finding direction is just the beginning. When actually building products, you'll discover: <strong>there's a huge gap between knowing "what to do" and knowing "how to do it."</strong>
|
||||
In the previous chapter, we learned how to find <strong>ideas people are willing to pay for</strong>, but finding direction is only the beginning. In real product work, you will realize: <strong>there is a huge gap between knowing "what to build" and knowing "how to build it."</strong>
|
||||
|
||||
This gap is <strong>the concretization of requirements</strong>.
|
||||
That gap is <strong>making requirements concrete</strong>.
|
||||
|
||||
For example, in classes or personal projects, we often start with the simplest executable features:
|
||||
For example, in class or personal projects, we often start from the simplest executable function:
|
||||
|
||||
- "Make a kanban board, list the tasks."
|
||||
- "Help me make a drawing tool."
|
||||
- "Help me make a software that can collect questionnaires."
|
||||
- "Build a board that lists tasks."
|
||||
- "Help me build a drawing tool."
|
||||
- "Help me build software to collect questionnaires."
|
||||
|
||||
These are often just a tool, a feature module, not even a clear business problem. More critically, <strong>these ideas are often just "you think it's useful," not "users really need it."</strong>
|
||||
These are often just tools or isolated feature modules, and sometimes not even a clearly defined business problem. More importantly, <strong>these ideas are often "I think this is useful," not "users truly need this."</strong>
|
||||
|
||||
In enterprise projects or startup projects, product managers and engineers often start from larger business propositions. For example, let's assume such a scenario:
|
||||
In enterprise projects or startup projects, product managers and engineers usually start from larger business goals. For example, assume this scenario:
|
||||
|
||||
<el-card shadow="hover" style="border-left: 5px solid #409EFF; background-color: #ecf5ff; margin: 20px 0;">
|
||||
<div style="font-weight: bold; color: #303133; margin-bottom: 10px;">Business Scenario:</div>
|
||||
<div style="font-weight: bold; color: #303133; margin-bottom: 10px;">🛍️ Business Scenario:</div>
|
||||
<div style="color: #606266; line-height: 1.6;">
|
||||
<p>You are an e-commerce operations product manager at a store. Your boss gave you a vague but high-pressure proposition:</p>
|
||||
<p style="font-style: italic; margin-top: 10px;">"Now everyone on WeChat is using AI to make images and copy, it looks pretty simple. Help me set this up so we can be more efficient when listing new products on Douyin e-commerce."</p>
|
||||
<p>You are an e-commerce operations product manager at a store. Your boss gives you a vague but high-pressure assignment:</p>
|
||||
<p style="font-style: italic; margin-top: 10px;">"Everyone on public channels is using AI to make images and copywriting, and it looks easy. Set this up for us so we can launch new products on Douyin e-commerce more efficiently."</p>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
At this point you might think: "Boss, you're dreaming again!" However, such vague one-sentence decisions are very common in actual work, even more frequent than your weekly bubble tea orders. Therefore, to be a qualified workplace worker (I'd rather you be the CEO of an emerging startup), we must learn how to transition from making tools for personal use to making real product prototypes.
|
||||
You might think, "Boss, you are dreaming again." In real work, though, this kind of one-sentence, vague directive is very common. To become a capable professional (or better, an early-stage startup CEO), we must learn how to move from building personal tools to building real product prototypes.
|
||||
|
||||
Since we've learned AI IDE, you think about it and this requirement is actually quite simple — just let AI give a prompt based on this, throw it to the Agent and we're done, right?
|
||||
Since we already learned AI IDE usage, you may think this requirement is easy: give AI a prompt and let the agent do everything:
|
||||
|
||||
```
|
||||
Please refer to my requirements below,
|
||||
Help me design an e-commerce material workbench,
|
||||
Including generation and management functions for product descriptions, images, videos, and other materials.
|
||||
```text
|
||||
Please refer to my requirement xxxx,
|
||||
help me design an e-commerce asset workbench,
|
||||
including generation and management of product descriptions, images, videos, and other assets.
|
||||
```
|
||||
|
||||
If you excitedly convert this requirement directly into a prototype and send it to your boss — congratulations, this quarter's bonus is cancelled!
|
||||
If you excitedly convert this straight into a prototype and send it to your boss - congratulations, your quarterly bonus may disappear.
|
||||
|
||||
**Why is this? This is the core pain point we need to solve:**
|
||||
**Why? This is exactly the core pain point we need to solve:**
|
||||
|
||||
Previously when learning AI IDE, we made toy projects for personal use like Snake and calculators — simple features, you know what you want, make it for yourself. But **real business scenarios are completely different**:
|
||||
Previously, when learning AI IDE tools, we mostly built **toy projects for ourselves** like Snake and calculators: simple features, clear personal goals, and "works for me" is enough. But **real business scenarios are completely different**:
|
||||
|
||||
- **You're not the user**: The boss wants "improved efficiency," but you don't know how operations works daily or where they're stuck;
|
||||
- **AI doesn't understand business either**: You throw a vague requirement to AI, it can only guess based on general knowledge. What it makes looks right but actually doesn't work;
|
||||
- **Good ideas ≠ good products**: You think "adding an AI generation feature" is cool, but users might not need it at all, or it's more troublesome than before.
|
||||
- **You are not the user**: the boss says "improve efficiency," but you do not know how operations actually works daily or where the bottleneck is.
|
||||
- **AI does not understand your business either**: if you give AI a vague requirement, it can only guess from generic knowledge. The result may look plausible but be unusable.
|
||||
- **A good idea is not the same as a good product**: you may think "add AI generation" is cool, but users may not need it, or it might create more friction.
|
||||
|
||||
**That's why we must learn "from thinking of ideas to understanding users"** Only when your creativity truly solves others' problems, ask questions and deeply understand the business, can you make something truly valuable. (Good ideas are even more important than good technology)
|
||||
**That is why we must learn "from having an idea to understanding users."** Only when your idea truly solves someone else's problem, and you ask questions and deeply understand business context, can you produce real value. (A good idea can be even more important than good technology.)
|
||||
|
||||
### 1.1 From Imagination to Reality: Learn to Ask Business Questions
|
||||
### 1.1 From Imagination to Reality: Learn to Ask the Business
|
||||
|
||||
::: info First, let's clarify: What are requirements? What is business?
|
||||
::: info 💡 Clarify first: what is a requirement? what is business?
|
||||
|
||||
**Requirements** are what users really want, the troubles they encounter, the problems they want to solve. For example, "The boss wants me to list products faster" — this is a requirement.
|
||||
**A requirement** is what users truly want: the problem they encounter and want solved.
|
||||
For example, "my boss wants me to launch products faster" is a requirement.
|
||||
|
||||
**Business** is what users actually do every day, their way of working. For example, what e-commerce operations does daily: listing products, changing prices, making images, looking at data... these are all business.
|
||||
**Business** is what users actually do every day: their operational workflow.
|
||||
For example, daily e-commerce operations tasks include launching products, changing prices, making images, reviewing data, and more.
|
||||
|
||||
**Why care about business?**
|
||||
Because if you don't understand the business, the tools you make might be "look good but nobody uses them." Only by truly understanding how users work daily and where they're stuck can you make something that really helps them.
|
||||
**Why focus on business?**
|
||||
If you do not understand the business, you may build something that "looks good but nobody uses." Only when you understand users' daily workflow and bottlenecks can you build something truly helpful.
|
||||
|
||||
:::
|
||||
|
||||
From the simplest perspective, you can first ask yourself a few questions:
|
||||
From the simplest angle, ask yourself:
|
||||
|
||||
- The boss says "**improve efficiency a bit**" — what does that specifically mean? **Do it faster**? **Spend less money**? **Sell more goods**?
|
||||
- How are products currently listed? **Where is it not smooth**?
|
||||
- How many **new products** need to be done daily? How many **images** and how much **text** per product?
|
||||
- In current work, **which task is most troublesome**, **most unwanted**?
|
||||
- When the boss says "**improve efficiency**," what does that mean exactly? **Faster delivery**? **Lower cost**? **Higher sales**?
|
||||
- How are products launched now? **Where does the current process break down**?
|
||||
- How many **new products** are launched each day? How many **images** and how much **text** are needed per product?
|
||||
- Which tasks in the current workflow are the **most painful** and **most disliked**?
|
||||
|
||||
But these are all guessed questions. We need to ask the frontline Douyin e-commerce business people directly, "Where are your difficulties and concerns?" Get more accurate answers through communication:
|
||||
These are still assumptions. We need to ask frontline Douyin e-commerce practitioners directly: "Where are your actual difficulties, and what do you care about most?" This gives more accurate answers.
|
||||
|
||||
::: info Real Business Interview Results
|
||||
::: info 📋 Real business interview findings
|
||||
|
||||
We asked people doing e-commerce operations, and they mentioned these troubles:
|
||||
We asked e-commerce operators and heard:
|
||||
|
||||
**1. Too many things, too scattered**
|
||||
- One person manages several stores, each store has many products to handle;
|
||||
- Busy all day: **listing new products**, **changing prices**, **making images**, **looking at data** — one thing not finished before another starts.
|
||||
**1. Too much, too fragmented**
|
||||
- One person handles multiple stores, each with many products
|
||||
- Daily work keeps switching between **launching products**, **changing prices**, **creating images**, and **checking data**
|
||||
|
||||
**2. Content creation isn't done once, but iteratively**
|
||||
- First use **manufacturer-provided images**, **previously used materials** or **reference images found online**, quickly **list** products to test;
|
||||
- Spend a little money on promotion, **see if anyone buys**;
|
||||
- Only for **products that sell well** will they seriously make images, write details, shoot videos.
|
||||
**2. Content is iterative, not one-shot**
|
||||
- First use **vendor-provided images**, **historical assets**, or **reference screenshots** to quickly launch
|
||||
- Spend a small budget to test and **see if sales happen**
|
||||
- Only for **products that perform well** do they invest deeply in image design, detail pages, and video
|
||||
|
||||
:::
|
||||
After interviewing the business side, we feel passionate because now we can truly make a product prototype that perfectly fits the business! — Wrong again. If we try to "satisfy all demands at once," the product will be very large and hard to implement within the course timeframe. Therefore, we need to further organize and converge, finding the real core pain points.
|
||||
|
||||
### 1.2 From Divergence to Convergence: Lock in Core Business Pain Points and Features
|
||||
After interviewing the business side, we might feel, "Now we can build the perfect prototype." Still wrong. If we try to satisfy everything at once, the product becomes huge and impossible to land within course time. We still need to narrow and prioritize core pain points.
|
||||
|
||||
::: info Why "converge"? What is a "pain point"?
|
||||
### 1.2 From Divergence to Convergence: Lock the Core Pain Point and Features
|
||||
|
||||
**Many problems, but which one to do first?**
|
||||
::: info 💡 Why "convergence"? What is a "pain point"?
|
||||
|
||||
Users might tell you a bunch of problems: A is troublesome, B is troublesome, C is troublesome... But if you try to solve all problems at once, you might end up doing nothing well. So you need to **converge** — from a pile of problems, pick the **most painful, most urgent, most solvable** one to start with.
|
||||
**There are many problems. Which one do we solve first?**
|
||||
|
||||
Users can list many issues: A hurts, B hurts, C hurts. If we try to solve all of them at once, we may solve none well. So we must **converge**: pick the **most painful, most urgent, and most solvable** problem first.
|
||||
|
||||
**What is a pain point?**
|
||||
It's the specific problem users **find most annoying, most time-consuming, most want to solve**. Not "I think it's useful," but what users **complain about every day, find painful every time they do it**.
|
||||
It is the concrete problem users find **most frustrating, most time-consuming, and most urgent to fix**. Not "I think this is useful," but what users complain about repeatedly in real work.
|
||||
|
||||
:::
|
||||
|
||||
Through the interview above, we found operations has many problems: interrupted rhythm by activities, managing multiple stores, busy going back and forth between listing/pricing/images/data...
|
||||
From interviews, we found many issues: activity-driven interruptions, multi-store management pressure, frequent context-switching between launch/pricing/creative/data tasks.
|
||||
|
||||
If we try to "solve all these problems," we'll end up with a **comprehensive but hard-to-use tool**.
|
||||
If we attempt "solve all of it," we will end up with a **big but unusable** tool.
|
||||
|
||||
Let's categorize these problems (you can have AI help), roughly three types:
|
||||
With AI help, we can classify the issues into three groups:
|
||||
|
||||
1. **Rhythm problems**: When to list, when to adjust prices;
|
||||
2. **Efficiency problems**: How to manage multiple stores and products simultaneously;
|
||||
3. **Content problems**: How to quickly create product images and copy.
|
||||
1. **Rhythm problems**: when to launch, when to adjust price
|
||||
2. **Efficiency problems**: how to manage many stores/products in parallel
|
||||
3. **Content problems**: how to quickly produce product images and copy
|
||||
|
||||
For our course, the most suitable to solve first is **the 3rd type: content creation problems**. But "quickly create content" is still a bit abstract. Let's ask the business side specifically where they're stuck:
|
||||
For this course, the best first target is **Group 3: content creation**. But "make content quickly" is still broad, so we ask where exactly they get stuck:
|
||||
|
||||
::: info Business Side Says: Two Most Painful Parts of Content Creation
|
||||
::: info 📋 The business side says content has two biggest pain points
|
||||
|
||||
**Pain 1: Batch creating images and copy is too much effort**
|
||||
- Materials scattered everywhere: cloud drives, WeChat records, platform backends... **finding them is a hassle**;
|
||||
- Need to list many products at once, **no time to carefully craft each one**, can only throw something together;
|
||||
- Requirements aren't high, **presentable and listable is fine**, doesn't need to be fancy.
|
||||
**Pain Point 1: Batch image/copy production is exhausting**
|
||||
- Assets are scattered (cloud drives, chat history, backend), and **hard to find**
|
||||
- Many products need launching at once, so there is **no time for per-item perfection**
|
||||
- The standard is practical: **good enough to launch**, not perfect design
|
||||
|
||||
**Pain 2: Good solutions can't be saved for reuse**
|
||||
- Previously made good titles and layouts, **can't find them next time**;
|
||||
- Solutions scattered in chat history, old product links;
|
||||
- When needed, have to **dig through everything, copy-paste and edit for ages**;
|
||||
- Lacking a tool that can **collect, manage, and directly apply**.
|
||||
**Pain Point 2: Good approaches are not reusable**
|
||||
- Previously successful titles/layouts are **hard to find next time**
|
||||
- Useful approaches are scattered in chat records and old product links
|
||||
- Reuse requires **manual searching + copy/paste + heavy editing**
|
||||
- Missing a tool to **save, manage, and apply templates directly**
|
||||
|
||||
:::
|
||||
|
||||
Based on these two pain points, we want to make a simple little tool: **Help operations batch create images and copy, and save good solutions for direct reuse next time**.
|
||||
Based on these two pain points, we define a simple tool: **help operations batch-generate image and copy drafts, and save good patterns for direct reuse next time**.
|
||||
|
||||
It only does two things (you can have AI help refine, remember to keep deleting features based on business feedback):
|
||||
The tool only focuses on two capabilities (and you can keep cutting features with AI support as business feedback arrives):
|
||||
|
||||
::: info Feature 1: Batch Generate E-commerce Product Images and Copy
|
||||
::: info Feature 1: Batch generate e-commerce product images and copy
|
||||
|
||||
**What does this do?**
|
||||
Give the system some product information, and it automatically generates product images and text that can be used for listing on e-commerce platforms (like Douyin, Taobao).
|
||||
**What does it do?**
|
||||
Given product information, the system auto-generates product images and text that can be used on platforms like Douyin and Taobao.
|
||||
|
||||
**Input**
|
||||
| Type | Content |
|
||||
|------|------|
|
||||
| Product Information | Name, category, brand, material, size, color, etc. |
|
||||
| Product Images | White background or simple scene images |
|
||||
| Reference Images | Screenshots of previously best-selling products or reference links |
|
||||
| Import Method | Batch import via Excel, or fill in directly on the page |
|
||||
| Product data | Name, category, brand, material, size, color, target users, etc. |
|
||||
| Product images | White background image or simple scene image |
|
||||
| Reference assets | Screenshots/links of previously successful products |
|
||||
| Import method | Excel batch import or direct form input/upload |
|
||||
|
||||
**Output (Generated E-commerce Materials)**
|
||||
- **Product Main Image**: Product display image with text selling points (first image users see when scrolling)
|
||||
- **Product Title**: Keyword combination that can be searched
|
||||
- **Selling Point Copy**: 1-2 sentences to attract buyers
|
||||
- All are **finished products that can be listed with minor edits**
|
||||
**Output (generated listing assets)**
|
||||
- **Main product image**: a presentable image draft with core selling points
|
||||
- **Product title**: keyword-structured title fit for search
|
||||
- **Selling-point copy**: 1-2 sentences that attract buyers
|
||||
- All outputs should be **launch-ready or editable with light changes**
|
||||
|
||||
**Effect**
|
||||
- Before: Every product had to start from scratch making images and writing copy
|
||||
- After: Throw a batch of products into the system, generate drafts, then pick and edit
|
||||
**Workflow impact**
|
||||
- Before: start each product's creative work from scratch
|
||||
- After: submit a batch, get drafts, then filter and fine-tune
|
||||
|
||||
:::
|
||||
|
||||
::: info Feature 2: Save Good Solutions as Templates
|
||||
::: info Feature 2: Save effective output as reusable templates
|
||||
|
||||
**Input**
|
||||
| Type | Content |
|
||||
|------|------|
|
||||
| Complete Set | Main image + Title + Copy |
|
||||
| A complete set | Main image + title + selling-point copy |
|
||||
|
||||
**Output**
|
||||
| Function | Description |
|
||||
|------|------|
|
||||
| Apply | Use template to auto-generate for new products |
|
||||
| Edit | Directly modify title, modify copy |
|
||||
| Manage | Name, tag (like "men's bag template", "promotion title"), easy to find |
|
||||
| Apply | Reuse a saved template for new product generation |
|
||||
| Edit | Directly edit title or copy |
|
||||
| Manage | Name and tag templates (for example "men's bag template", "campaign title"), searchable later |
|
||||
|
||||
**Effect**
|
||||
1. Import new product
|
||||
2. Choose: Let system generate by default, or **use my saved template**
|
||||
3. System automatically applies template style, outputs new images and copy
|
||||
**Workflow impact**
|
||||
1. Import a new product
|
||||
2. Choose default generation or **apply a saved template**
|
||||
3. System applies template style and outputs a new image + copy draft
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
**Review what we just did:**
|
||||
**What did we just do?**
|
||||
|
||||
1. **Asked questions first**: Didn't start building directly, but first asked operations "what annoys you most";
|
||||
2. **Found pain points**: Discovered their most painful parts are "making images and copy is too much effort" and "good solutions can't be saved";
|
||||
3. **Converged scope**: Not making a comprehensive platform, just these two features: "batch generate images and copy + save templates".
|
||||
1. **Asked first**: not coding immediately, but asking operators what hurts most
|
||||
2. **Found core pain**: "image/copy creation is too labor-intensive" and "good patterns cannot be reused"
|
||||
3. **Converged scope**: not building a huge platform; only two core features first
|
||||
|
||||
**Why is this important?**
|
||||
**Why this matters**
|
||||
|
||||
Many beginners' misconception about product building is: more features is better. But what users really need is **to solve the most painful problem**. Making a bunch of features that don't work well is worse than making one or two features that really help users.
|
||||
A beginner trap is "more features = better." In reality, users need you to solve the **single most painful problem** first. Many weak features are less valuable than a few features that truly work.
|
||||
|
||||
**Core of Product and Business Thinking:**
|
||||
- Don't think for yourself "I think users need what"
|
||||
- Ask users "What do you do every day? Where is it most painful?"
|
||||
- From a pile of problems **converge** to the most painful, most solvable one
|
||||
- First make a **minimum viable** version, then slowly iterate
|
||||
**Core product/business thinking**
|
||||
- Do not decide from your assumptions
|
||||
- Ask users what they do daily and where it hurts most
|
||||
- Converge toward the most painful and solvable point
|
||||
- Build a **minimum usable version** first, then iterate
|
||||
|
||||
This is what we need to figure out before writing code. Code is just a tool; **understanding users and finding the right problem** is the first step.
|
||||
This is what must be clear before coding. Code is just a tool; **understanding users and locking the right problem** is step one.
|
||||
|
||||
<div style="margin: 50px 0;">
|
||||
<ClientOnly>
|
||||
<StepBar :active="1" :items="[
|
||||
{ title: 'Requirements Analysis', description: 'From vague to specific' },
|
||||
{ title: 'Single Page Validation', description: 'Core functionality implementation' },
|
||||
{ title: 'Multi-page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Beautification', description: 'Improve user experience' }
|
||||
{ title: 'Requirement Analysis', description: 'From vague to concrete' },
|
||||
{ title: 'Single-Page Validation', description: 'Implement the core gameplay' },
|
||||
{ title: 'Multi-Page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Polish and Refine', description: 'Improve user experience' }
|
||||
]" />
|
||||
</ClientOnly>
|
||||
</div>
|
||||
|
||||
## 2. Generate Prototype in 10 Minutes: Let AI IDE Implement "Core Gameplay"
|
||||
## 2. Build a Prototype in 10 Minutes: Let AI IDE Implement the Core Gameplay
|
||||
|
||||
::: info Programming Plan Suggestion
|
||||
If you feel the current IDE isn't smart enough, or you run out of quota quickly, you can buy a **programming Plan**. Preview in advance by referring to [this article](../../stage-2/backend/2.6-modern-cli/) for programming with Claude.
|
||||
::: info 💡 Coding plan suggestion
|
||||
If your current IDE feels not smart enough, or you run out of quota quickly, consider a dedicated **coding plan**. You can preview [this article](../../stage-2/backend/2.6-modern-cli/) to use Claude for coding.
|
||||
:::
|
||||
|
||||
Thinking is good, but don't overthink. Let's control excessive reflection and try making a prototype starting from a single page.
|
||||
Thinking is good, but avoid overthinking. Let's start from one page and build a prototype first.
|
||||
|
||||
### 2.1 First Step: Tell AI What You Want in Plain Language
|
||||
### 2.1 Step 1: Tell AI What You Want in Plain Language
|
||||
|
||||
When starting out, don't pursue perfect prompts. Begin with your most natural expression. Just like describing requirements to a colleague, tell AI in plain language what you want to do, then let AI help you optimize it into a more professional expression.
|
||||
At the beginning, do not chase a perfect prompt. Start with your natural description. Explain your goal to AI as if talking to a teammate, then let AI help refine it into clearer language.
|
||||
|
||||
#### 2.1.1 Start from Verbal Description (Recommended for Beginners)
|
||||
#### 2.1.1 Start with spoken-style description (recommended for beginners)
|
||||
|
||||
First describe your idea in your own words, even if it's rough, that's fine:
|
||||
Describe your idea in your own words. Rough is fine:
|
||||
|
||||
```
|
||||
I want to make a tool that helps e-commerce operations automatically generate product main images and copy.
|
||||
Operations usually have to manually make images and write copy one by one, which is very troublesome.
|
||||
My idea is: they upload product information, the system automatically generates a batch of drafts,
|
||||
operations pick the good ones and make minor edits before using.
|
||||
```text
|
||||
I want to build a tool that helps e-commerce operators automatically generate product main images and copy.
|
||||
Operators currently make images and copy one by one manually, which is painful.
|
||||
My idea: they upload product info, and the system generates a batch of drafts.
|
||||
Operators pick useful ones and make light edits.
|
||||
|
||||
First make the simplest version: one page, fill in product info on the left,
|
||||
display generated results on the right. Can upload images, can fill in text,
|
||||
after generation show main image preview and copy.
|
||||
Start with the simplest version: one page. Input area on the left,
|
||||
generated results on the right. Support image upload and text fields.
|
||||
After generation, show main image preview and copy.
|
||||
```
|
||||
|
||||
Next, send this text to AI (like ChatGPT, Claude, etc.) and let it help you expand. AI usually helps you add details you didn't consider, organizes your ideas more clearly, and finally generates a prompt suitable for sending to AI IDE.
|
||||
Then send this to AI (ChatGPT, Claude, etc.) and ask it to expand and structure it. AI often adds details you might miss and produces a better prompt for your AI IDE.
|
||||
|
||||
You can say this to AI:
|
||||
```
|
||||
Help me expand the above idea, organize it into a clear business logic document,
|
||||
then generate a prompt suitable for sending to AI IDE (like Cursor, Trae),
|
||||
for generating single-page application prototype code.
|
||||
You can ask like this:
|
||||
|
||||
```text
|
||||
Please expand the idea above into a clear business-logic document,
|
||||
then generate a prompt suitable for an AI IDE (for example Cursor or Trae)
|
||||
to generate a single-page prototype application.
|
||||
```
|
||||
|
||||
AI will return a structured requirement and corresponding prompt. You check it yourself, delete unnecessary features, and after confirming it's correct, use it to generate code.
|
||||
AI will return a structured requirement and prompt. Review it, remove unnecessary features, confirm it, then use it for code generation.
|
||||
|
||||
The benefit of doing this: verbal descriptions are the most authentic ideas, but might miss some important details. When AI helps you expand, it might ask "do you want to support batch upload?" — questions you didn't think of, helping you further validate. You can choose to keep or delete impractical features based on feedback, and through repeated modifications determine the first version prompt to give AI.
|
||||
Why this works: your spoken description captures your true intent, but may miss key details. AI expansion can surface questions like "do you need batch upload?" which helps validation. Keep refining by adding/removing features until your first working prompt is solid.
|
||||
|
||||
#### 2.1.2 Skip the Expansion Step: Directly Throw Your Organized Business Document to AI
|
||||
#### 2.1.2 Skip expansion: directly give AI your organized business doc
|
||||
|
||||
If you've already organized the business logic document in previous chapters (like a requirements description written in plain language), you can directly use the format below to send to AI IDE, skipping the intermediate step of having AI expand. Suitable when requirements are already clear and you want to start coding directly:
|
||||
If your business logic document is already prepared (for example from earlier chapters), you can directly feed it to the AI IDE using a structured format. This is suitable when requirements are already clear and you want to move fast.
|
||||
|
||||
```
|
||||
Help me implement a single-page application based on business logic, for validating core gameplay functionality.
|
||||
```text
|
||||
Please implement a single-page app based on the business logic below
|
||||
to validate the core gameplay.
|
||||
|
||||
Business logic reference:
|
||||
1. Help operations batch generate first version of image and text drafts:
|
||||
- **Input (supports direct upload and batch import of materials):**
|
||||
- Product basic info: name, category, brand, material, size, color, target audience, etc.;
|
||||
- Product images: white background / simple scene images;
|
||||
- Each generation supports uploading additional historical bestseller screenshots or reference links, allowing for reference materials;
|
||||
- Supports batch import via Excel, or online entry/upload on the page.
|
||||
- Supports specifying on the page whether to save product materials to the material library for next time use
|
||||
- **Output (content that can be directly listed or listed with minor edits):**
|
||||
- Each product gets one "presentable, containing basic selling points" main image draft;
|
||||
- One "reasonably structured, containing core keywords" title + 1-2 sentences of selling point copy.
|
||||
- **Expected usage change:**
|
||||
From starting from scratch for each batch of products to throwing a batch of products into the system, taking the system-generated drafts for filtering and minor adjustments.
|
||||
Business logic:
|
||||
1. Help operations batch-generate first-round image+copy drafts:
|
||||
- **Input (support direct upload and batch import):**
|
||||
- Product fields: name, category, brand, material, size, color, target users, etc.
|
||||
- Product image: white background image / simple scene image
|
||||
- Per generation, support additional uploads of historical bestseller screenshots or reference links
|
||||
- Support Excel batch import or direct online input/upload
|
||||
- Support an option to save product assets to an asset library for later use
|
||||
- **Output (usable for listing with no or light edits):**
|
||||
- For each product, one "acceptable, basic-selling-point" main-image draft
|
||||
- One "well-structured, keyword-containing" title + 1-2 selling-point lines
|
||||
- **Expected workflow change:**
|
||||
Move from writing every product from scratch to dropping batches into the system and selecting/fine-tuning generated drafts.
|
||||
|
||||
First make the first feature, the second feature (template library) will be added later.
|
||||
First implement feature 1. Feature 2 (template library) can be added later.
|
||||
```
|
||||
|
||||
#### 2.1.3 Programmer's Approach (Advanced): Let AI Help You Write "Prompts for Prompts"
|
||||
#### 2.1.3 Advanced approach: let AI write a "prompt for your coding agent"
|
||||
|
||||
If you want more fine-grained control over the code generation process, you can first have AI (like ChatGPT) generate a prompt specifically for AI IDE based on your requirements:
|
||||
If you want finer control over code generation, ask AI to produce a coding-agent prompt first:
|
||||
|
||||
```
|
||||
Based on the idea below, help me write a prompt for a coding Agent,
|
||||
I need to use this prompt to generate code.
|
||||
```text
|
||||
Based on the idea below, write a coding-agent prompt for me.
|
||||
I will use it to generate code.
|
||||
|
||||
[Paste your business logic description here]
|
||||
[paste your business logic here]
|
||||
|
||||
Requirements:
|
||||
1. The prompt should include clear page layout descriptions
|
||||
2. Clarify data structures and interaction logic
|
||||
3. Specify tech stack (like React + Tailwind)
|
||||
4. List core functionality points to implement
|
||||
1. Include a clear page layout description
|
||||
2. Define data structures and interaction logic
|
||||
3. Specify the tech stack (for example React + Tailwind)
|
||||
4. List core features to implement
|
||||
```
|
||||
|
||||
Usually AI will generate a structured prompt like below:
|
||||

|
||||
AI will usually output a structured prompt similar to this:
|
||||

|
||||
|
||||
You can slightly modify this prompt, then send it to AI IDE to generate code.
|
||||
You can then make small edits and pass it into your AI IDE.
|
||||
|
||||
### 2.2 Second Step: Let AI IDE Directly Generate Code
|
||||
### 2.2 Step 2: Let AI IDE Generate the Code Directly
|
||||
|
||||
#### 2.2.1 Preparation: Understand AI IDE Basic Operations
|
||||
#### 2.2.1 Preparation: understand basic AI IDE operations
|
||||
|
||||
If you're not yet familiar with the basic usage of AI IDE (like Cursor, Trae, Windsurf, etc.), it's recommended to first check the [IDE Basics Tutorial](/zh-cn/appendix/2-development-tools/ide-basics/) in the appendix to understand how to:
|
||||
- Create new projects
|
||||
- Dialogue with AI Agent
|
||||
- Understand AI's code generation process
|
||||
If you are not yet familiar with AI IDEs (Cursor, Trae, Windsurf, etc.), read the appendix first: [IDE Basics](/en/appendix/2-development-tools/ide-basics/). Learn:
|
||||
|
||||
#### 2.2.2 Start Generating Code
|
||||
- how to create a new project
|
||||
- how to chat with an AI agent
|
||||
- how to understand AI-generated code flow
|
||||
|
||||
At this point you've obtained the initial prompt. Let's use the first prompt style as an example, letting AI help us generate code. First create a window and corresponding folder, open the folder (initialize a new project in your favorite folder location):
|
||||

|
||||

|
||||
#### 2.2.2 Start generating code
|
||||
|
||||
In the sidebar, select a model you like (recommend gemini, gpt, glm, kimi, minimax, etc.), enter the prompt obtained in the first step:
|
||||

|
||||
Now you already have the initial prompt. Using the first prompt style as an example, let AI help generate the project. Create/open a folder and initialize a new project:
|
||||
|
||||
After clicking generate, we'll see a familiar process. AI will plan the project's directory structure, necessary files, and give initial content for each file based on the prompt.
|
||||

|
||||

|
||||
|
||||
::: warning Special Note: AI Might Stop and Wait for Your Confirmation
|
||||
During generation, AI Agent often **stops to wait for your input or confirmation**, for example:
|
||||
- Asking if you want to continue to the next step
|
||||
- Having you press Enter to confirm an operation
|
||||
- Asking about your choice for a technical detail
|
||||
In the sidebar, choose a model you like (for example Gemini, GPT, GLM, Kimi, MiniMax), then paste the prompt from step one:
|
||||
|
||||
**If you see AI isn't moving, first check the dialogue interface to see if it's waiting for your reply.** Many beginners think AI is thinking, but it actually stopped waiting for you long ago. Reply actively or press Enter, and AI will continue working.
|
||||

|
||||
|
||||
After generation starts, AI will plan the folder structure, create needed files, and fill initial code.
|
||||
|
||||
::: warning ⚠️ Important: AI may pause and wait for your confirmation
|
||||
During generation, the AI agent often **stops and waits for your input**, for example:
|
||||
- asking whether to continue
|
||||
- asking you to press Enter to confirm
|
||||
- asking for a technical choice
|
||||
|
||||
**If AI appears idle, first check the chat panel to see whether it is waiting for you.**
|
||||
Many beginners think AI is "thinking," but it is actually paused for input.
|
||||
:::
|
||||
|
||||
At this point, don't forget to press Enter to confirm information (otherwise it will be stuck waiting; some AI IDEs don't have this issue):
|
||||

|
||||
Do not forget to press Enter for confirmation where needed (some IDEs behave differently):
|
||||
|
||||
If you encounter the following scenario, this means a service has already started locally. You need to click skip, otherwise it will stay on this interface (if nothing appears after code generation finishes, you need to actively say "help me start this project"):
|
||||

|
||||

|
||||
|
||||
::: info Scenario Explanation
|
||||
**Scenario Explanation**: You used `npm create vite@latest` to create a React + TypeScript project (easy-vibe-web). After creation, the computer will automatically "run" this webpage, making it convenient for you to see the effect immediately.
|
||||
If you encounter the screen below, it usually means the local service has already started. Click skip if needed, otherwise you may stay stuck there. (If generation is done but no preview appears, ask AI directly: "Please start this project.")
|
||||
|
||||

|
||||
|
||||
::: info 💡 Scenario explanation
|
||||
**Scenario**: you used `npm create vite@latest` to initialize a React + TypeScript project (`easy-vibe-web`). After creation, your computer starts a local web service so you can preview immediately.
|
||||
|
||||
**Local service**: a temporary web service running only on your own machine.
|
||||
|
||||
**localhost**: means "this machine itself."
|
||||
|
||||
**Port**: an ID for distinguishing multiple services on the same machine (this project uses port 5174).
|
||||
|
||||
**Link `http://localhost:5174/`**: open this in browser to view the running project.
|
||||
|
||||
**Why 5174?** 5173 may already be occupied, so Vite auto-switched to 5174. This is normal.
|
||||
|
||||
**Local Service**: Can be understood as your computer temporarily opening a webpage display window, running only on your own computer, others can't access it.
|
||||
:::
|
||||
|
||||
---
|
||||
After confirmation, wait briefly, and you should see the initial result:
|
||||
|
||||
**🎉 Congratulations! You've completed the first version of your prototype!**
|
||||

|
||||
|
||||
Now you can see the running effect in the browser. Next, we'll expand based on this foundation.
|
||||
The base function appears, but UI is rough. Now talk to AI directly to improve visual quality:
|
||||
|
||||

|
||||
|
||||
After refinement, you can get a cleaner interface:
|
||||
|
||||

|
||||
|
||||
Then keep iterating by need, for example:
|
||||
|
||||
- "I do not need batch import now. Remove it."
|
||||
- "The left-side form has too many fields. Keep only xxxx."
|
||||
|
||||
You can even ask AI to reference established websites by attaching screenshots:
|
||||
|
||||

|
||||
|
||||
Result example:
|
||||
|
||||

|
||||
|
||||
### 2.3 What to Do When Errors Happen
|
||||
|
||||
In real practice, errors are inevitable. This is normal and does not mean you failed. You do not need to fully understand every error at once; you only need to give AI the complete observed context.
|
||||
|
||||
Common handling patterns:
|
||||
|
||||
- **Case 1: page or terminal errors**
|
||||
If the page turns red, goes blank, or the terminal shows many red logs, take a screenshot or copy all error text and send it to AI.
|
||||
|
||||
- **Case 2: function is wrong but no error appears**
|
||||
For example button does nothing, data does not show, styles break. Describe in plain language: "what happened" + "what I expected." Add screenshot if needed.
|
||||
|
||||
- **Case 3: unsure whether it is a problem**
|
||||
Ask AI directly: "Please check this feature for obvious issues and suggest whether adjustments are needed."
|
||||
|
||||
#### 2.3.1 Common beginner questions
|
||||
|
||||
- **Q: I do not know where the error is**
|
||||
- A: find all red text in terminal/console/page, copy all of it, and send to AI.
|
||||
|
||||
- **Q: AI fixed it, but the same error persists**
|
||||
- A: very common. Send the latest error output again and ask AI to continue fixing on top of previous changes.
|
||||
|
||||
- **Q: Do I need to fully understand the fix immediately**
|
||||
- A: no. Focus on one or two points each time. Understanding grows gradually like vocabulary learning.
|
||||
|
||||
- **Q: after many attempts, still broken**
|
||||
- A: try these:
|
||||
- use IDE version rollback in chat/history to return to a known working state
|
||||
- switch model or improve prompt specificity
|
||||
- package "current code + error logs + expected behavior" and ask AI to refactor that part as a whole
|
||||
|
||||
## 3. Expand from Single-Page to Multi-Page Application
|
||||
|
||||
<div style="margin: 50px 0;">
|
||||
<ClientOnly>
|
||||
<StepBar :active="2" :items="[
|
||||
{ title: 'Requirements Analysis', description: 'From vague to specific' },
|
||||
{ title: 'Single Page Validation', description: 'Core functionality implementation' },
|
||||
{ title: 'Multi-page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Beautification', description: 'Improve user experience' }
|
||||
{ title: 'Requirement Analysis', description: 'From vague to concrete' },
|
||||
{ title: 'Single-Page Validation', description: 'Implement the core gameplay' },
|
||||
{ title: 'Multi-Page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Polish and Refine', description: 'Improve user experience' }
|
||||
]" />
|
||||
</ClientOnly>
|
||||
</div>
|
||||
|
||||
## 3. Multi-page Expansion: From Single Function to Complete Application
|
||||
Once the core gameplay logic is roughly generated, we can continue building remaining pages. For example, many settings buttons may still do nothing.
|
||||
|
||||
The single-page prototype has validated the core gameplay. Now we need to expand it into a complete application.
|
||||
You can ask AI to inspect against your business requirements and generate missing parts, or directly ask AI to implement unfinished pages one by one until all page interactions work:
|
||||
|
||||
### 3.1 Analyze Current Prototype's Shortcomings
|
||||

|
||||
|
||||
Reviewing our single-page prototype, we'll find some obvious issues:
|
||||
After a short wait, you can see multiple pages and interactive features added on top of the previous base:
|
||||
|
||||
1. **No navigation**: Users can only see one page, can't switch between different functions
|
||||
2. **No data persistence**: Refresh the page and all data is gone
|
||||
3. **No error handling**: If something goes wrong, users don't know what happened
|
||||
4. **No user feedback**: No prompts after operations, users don't know if they succeeded
|
||||

|
||||

|
||||
|
||||
### 3.2 Design Multi-page Structure
|
||||
At this stage, manually click through the key flows you care about and confirm interactions. If something is not interactive, ask AI to fix it.
|
||||
|
||||
Based on the business requirements we analyzed earlier, we need the following pages:
|
||||
|
||||
1. **Homepage/Dashboard**: Display task list, quick actions
|
||||
2. **Product Management**: Add, edit, delete products
|
||||
3. **Generation Page**: Core functionality - generate images and copy
|
||||
4. **Template Library**: Save and manage templates
|
||||
5. **Settings**: User preferences, API configuration
|
||||
|
||||
### 3.3 Let AI IDE Help You Expand
|
||||
|
||||
You can tell AI IDE:
|
||||
|
||||
```
|
||||
Now I need to expand this single-page application into a multi-page application.
|
||||
Please help me:
|
||||
1. Add routing to support switching between pages
|
||||
2. Create the following pages:
|
||||
- Homepage: Display task list
|
||||
- Product Management: CRUD operations for products
|
||||
- Generation Page: The existing single-page functionality
|
||||
- Template Library: Save and manage templates
|
||||
3. Add navigation bar for page switching
|
||||
```
|
||||
|
||||
AI IDE will help you complete these expansions. You just need to confirm and adjust.
|
||||
## 4. Make the Prototype Feel Real
|
||||
|
||||
<div style="margin: 50px 0;">
|
||||
<ClientOnly>
|
||||
<StepBar :active="3" :items="[
|
||||
{ title: 'Requirements Analysis', description: 'From vague to specific' },
|
||||
{ title: 'Single Page Validation', description: 'Core functionality implementation' },
|
||||
{ title: 'Multi-page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Beautification', description: 'Improve user experience' }
|
||||
{ title: 'Requirement Analysis', description: 'From vague to concrete' },
|
||||
{ title: 'Single-Page Validation', description: 'Implement the core gameplay' },
|
||||
{ title: 'Multi-Page Expansion', description: 'Complete application structure' },
|
||||
{ title: 'Polish and Refine', description: 'Improve user experience' }
|
||||
]" />
|
||||
</ClientOnly>
|
||||
</div>
|
||||
|
||||
## 4. Beautification and Optimization: Make the Prototype More Professional
|
||||
After multi-page structure is in place, the final step is moving from "runs" to "feels smooth and professional." That means walking the entire user flow end to end and asking AI to fix any broken parts until you can refresh and run full flows from zero as a new user.
|
||||
|
||||
A working prototype is just the first step. To make it impressive, we need to beautify and optimize.
|
||||
Let's revisit the initial requirement:
|
||||
|
||||
### 4.1 UI Beautification
|
||||
```text
|
||||
1. Help operations batch-generate first-round image+copy drafts:
|
||||
- **Input (supports direct upload and batch import):**
|
||||
- Product basic data: name, category, brand, material, size, color, target audience, etc.
|
||||
- Product image: white background / simple scene image
|
||||
- Per generation, support extra upload of historical bestseller screenshots or reference links
|
||||
- Support Excel batch import or online entry/upload
|
||||
- Support a page option for saving product assets to asset library for future use
|
||||
- **Output (directly listable or listable with light edits):**
|
||||
- For each product, one "presentable image draft with basic selling points"
|
||||
- One "well-structured, keyword-rich title" + 1-2 selling-point lines
|
||||
- **Expected workflow change:**
|
||||
Move from creating every batch from scratch to dropping batches into the system, then filtering and fine-tuning generated drafts.
|
||||
|
||||
Tell AI IDE:
|
||||
|
||||
```
|
||||
Please help me beautify this application:
|
||||
1. Use a consistent color scheme
|
||||
2. Add appropriate spacing and alignment
|
||||
3. Improve button and form styles
|
||||
4. Add hover effects and transitions
|
||||
5. Ensure responsive design for different screen sizes
|
||||
2. Turn useful output into a reusable template library:
|
||||
- **What can be saved?**
|
||||
- Any output judged "useful" by operations can be saved in one click:
|
||||
- full combo: main image + title + selling points
|
||||
- partial save: for example title pattern only or copy snippet only
|
||||
- **What can you do after saving?**
|
||||
- **Reuse:**
|
||||
- apply saved template to a new product batch
|
||||
- or generate multiple variants on same product for A/B testing
|
||||
- **Edit:**
|
||||
- edit title/copy directly
|
||||
- if image editing is supported, adjust text/stickers on main image
|
||||
- **Manage:**
|
||||
- name and tag collections (for example "men bag main image template", "campaign title structure"), and optionally categorize by store
|
||||
- **How to use on next launch?**
|
||||
- after importing new products, operations can choose:
|
||||
- default system generation, or
|
||||
- "generate using my saved template"
|
||||
- system applies template structure/style to new product data and outputs new main image + title + selling-point drafts
|
||||
```
|
||||
|
||||
### 4.2 UX Optimization
|
||||
If each test requires manual setup from scratch, testing becomes expensive. In practice we often create **test data entry points** to accelerate full-flow testing. You can ask AI:
|
||||
|
||||
```
|
||||
Please help me improve the user experience:
|
||||
1. Add loading states for all async operations
|
||||
2. Add success/error notifications
|
||||
3. Add confirmation dialogs for destructive actions
|
||||
4. Improve form validation and error messages
|
||||
5. Add keyboard shortcuts for common actions
|
||||
```text
|
||||
I need to test the full user journey and ensure everything works end to end.
|
||||
Please generate test-data shortcuts based on the requirement below so I can quickly validate the entire flow:
|
||||
1. Help operations batch-generate first-round image+copy drafts:
|
||||
- **Input (supports direct upload and batch import):**
|
||||
- Product basic data: name, category, brand, material, size, color, target audience, etc.
|
||||
- Product image: white background / simple scene image
|
||||
- Per generation, support extra upload of historical bestseller screenshots or reference links
|
||||
- Support Excel batch import or online entry/upload
|
||||
- Support a page option for saving product assets to asset library for future use
|
||||
- **Output (directly listable or listable with light edits):**
|
||||
- For each product, one "presentable image draft with basic selling points"
|
||||
- One "well-structured, keyword-rich title" + 1-2 selling-point lines
|
||||
- **Expected workflow change:**
|
||||
Move from creating every batch from scratch to dropping batches into the system, then filtering and fine-tuning generated drafts.
|
||||
```
|
||||
|
||||
### 4.3 Performance Optimization
|
||||
You can quickly get a usable result (and if one case is not enough, ask AI to generate multiple test cases):
|
||||
|
||||
```
|
||||
Please help me optimize performance:
|
||||
1. Lazy load images
|
||||
2. Implement pagination for long lists
|
||||
3. Add debouncing for search inputs
|
||||
4. Optimize bundle size
|
||||
```
|
||||

|
||||
|
||||
---
|
||||
Click to test:
|
||||
|
||||
## Summary
|
||||

|
||||
|
||||
In this chapter, we learned:
|
||||
At this point, the result may appear immediately without a simulated generation process. If you want realistic delay/feedback, ask AI:
|
||||
|
||||
1. **Requirements Analysis**: How to extract real pain points from vague business requirements
|
||||
2. **Single-page Validation**: Quickly validate core functionality with AI IDE
|
||||
3. **Multi-page Expansion**: Expand from single function to complete application
|
||||
4. **Beautification and Optimization**: Make prototypes more professional and user-friendly
|
||||
"Please simulate a real generation process so after clicking, results appear after a short delay."
|
||||
|
||||
**Key Takeaways:**
|
||||

|
||||
|
||||
- Don't start coding immediately — understand requirements first
|
||||
- Start with the simplest version — validate before expanding
|
||||
- Let AI help you — but you make the final decisions
|
||||
- Iterate based on feedback — keep improving
|
||||
After generation flow works, verify template-library behavior. If the "save template" interaction is missing, ask AI:
|
||||
|
||||
In the next chapter, we'll learn how to integrate real AI capabilities into our prototype.
|
||||
"Please ensure requirement 2 works correctly: I can save a generated result as a template, open it, and view generation parameters."
|
||||
|
||||
## 5. 📚 Assignment: Recreate Your Own Douyin Ecommerce Workbench
|
||||
Generation is usually iterative, and screenshots are often needed for correction:
|
||||
|
||||

|
||||
|
||||
Expected final result:
|
||||
|
||||

|
||||
|
||||
Besides manual user-flow testing, you can also ask AI to do requirement coverage checks:
|
||||
|
||||
- "Compare this app against my original requirement. Are all core features covered?"
|
||||
- "Give me a checklist: completed, missing, and weak-experience parts."
|
||||
|
||||
AI will usually return a checklist. Use it to decide whether to continue iterating. After several rounds, you can get a much stronger prototype.
|
||||
|
||||
## 5. 📚 Assignment: Recreate Your Own Douyin E-commerce Workbench
|
||||
|
||||
<el-card shadow="hover" style="margin: 20px 0; border-radius: 12px;">
|
||||
<template #header>
|
||||
<div style="font-weight: bold; font-size: 16px;">🚀 Challenge: Recreate the Ecommerce Asset Workbench</div>
|
||||
<div style="font-weight: bold; font-size: 16px;">🚀 Challenge Task: Recreate an E-commerce Asset Workbench</div>
|
||||
</template>
|
||||
|
||||
<p>
|
||||
Use this chapter's prompts and workflow to complete a full end-to-end loop:
|
||||
Follow this chapter's approach and complete one full loop:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<strong>Full Loop Practice</strong>
|
||||
<strong>Full-loop practice</strong>
|
||||
<ul>
|
||||
<li>Business requirement prompt generation → single-page prototype generation → multi-page prototype generation</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<strong>Share Results</strong>
|
||||
<strong>Share your result</strong>
|
||||
<ul>
|
||||
<li>Take a screenshot of your app and share it with the community</li>
|
||||
<li>Take screenshots of your application and share them with everyone</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<strong>Thinking Exercise</strong>
|
||||
<strong>Thinking question</strong>
|
||||
<ul>
|
||||
<li>Reserve room for next chapter ("Integrating LLM and text-to-image capabilities"), and think ahead: how will you embed AI copywriting, image generation, and script generation in your workbench?</li>
|
||||
<li>Reserve space for next chapter ("Integrating LLM and text-to-image capabilities"). Think in advance: how can your workbench embed AI copywriting, image generation, and script generation?</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -539,8 +623,14 @@ In the next chapter, we'll learn how to integrate real AI capabilities into our
|
||||
|
||||
## Next Step
|
||||
|
||||
In the next chapter, we will integrate concrete AI capabilities into this content production workbench (text-to-text, image-to-text, and text-to-image), for example:
|
||||
In the next chapter, on top of this content-production workbench, we will integrate concrete AI capabilities (text-to-text, image-to-text, text-to-image), for example:
|
||||
|
||||
- Automatically generate first-draft copy and multiple title options for a content task
|
||||
- Auto-generate first-draft copy and multiple title candidates for a given content task
|
||||
- Auto-generate visual drafts from task descriptions (text-to-image)
|
||||
- Auto-classify and summarize historical content tasks to help plan your next campaign
|
||||
- Auto-classify and summarize historical tasks to help plan the next campaign theme
|
||||
|
||||
<RelatedArticlesSection
|
||||
title="Continue Learning"
|
||||
description="Recommended order: integrate AI capabilities -> complete full project loop -> design engineering."
|
||||
:items="relatedArticles"
|
||||
/>
|
||||
|
||||
@@ -4,7 +4,11 @@ description: 'Integrate real AI capabilities into your existing web prototype: u
|
||||
---
|
||||
|
||||
<script setup>
|
||||
import { relatedArticlesMap } from '@theme/data/relatedArticles'
|
||||
|
||||
const duration = 'About <strong>1 day</strong>'
|
||||
const relatedArticles =
|
||||
relatedArticlesMap['en/stage-1/1.3-integrating-ai-capabilities'] ?? []
|
||||
</script>
|
||||
|
||||
# Beginner Level 4: Injecting AI Capabilities into Your Prototype
|
||||
@@ -54,7 +58,7 @@ Specifically, you need to master the following core elements:
|
||||
6. **Error Handling**: Knowing how to troubleshoot when problems occur (such as incorrect API Key, too many requests, etc.)
|
||||
|
||||
::: info ℹ️ What is an API
|
||||
For a more in-depth explanation of APIs, see the appendix: [Introduction to APIs](/zh-cn/appendix/4-server-and-backend/api-intro).
|
||||
For a more in-depth explanation of APIs, see the appendix: [Introduction to APIs](/en/appendix/4-server-and-backend/api-intro).
|
||||
|
||||
::: warning 🔐 **API Security Notes**
|
||||
The API Key is your "pass" for requesting AI services — it's a secret string used for authentication and billing.
|
||||
@@ -89,7 +93,7 @@ Once you've grasped these concepts, you'll find that whether you're integrating
|
||||
In `1.2 Building Your Prototype`, you already created an interactive prototype. What we need to do next is turn the "AI-like features" in the prototype into real, working capabilities: **when the user clicks a button, the prototype sends a request to an external AI service and displays the returned text.**
|
||||
|
||||
::: info ℹ️ Further Reading on Principles
|
||||
If you want to learn more about the underlying principles, check out the appendix: [Introduction to Large Language Models (LLM)](/zh-cn/appendix/8-artificial-intelligence/llm-principles).
|
||||
If you want to learn more about the underlying principles, check out the appendix: [Introduction to Large Language Models (LLM)](/en/appendix/8-artificial-intelligence/llm-principles).
|
||||
::: details Learn More: What is DeepSeek?
|
||||
|
||||
**Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co., Ltd.**, operating under the brand name DeepSeek, is a **Chinese artificial intelligence (AI) company that develops large language models (LLMs)**. DeepSeek is headquartered in Hangzhou, Zhejiang, and is owned and funded by the Chinese hedge fund High-Flyer. DeepSeek was founded in July 2023 by Liang Wenfeng, co-founder of High-Flyer, who also serves as CEO of both companies. The company launched its eponymous chatbot and its DeepSeek-R1 model in January 2025.
|
||||
@@ -215,7 +219,7 @@ For more details, refer to the [MiniMax OpenAI Compatible API Documentation](htt
|
||||
# 3. Integrating the Image-to-Text API: Qwen3 VL
|
||||
|
||||
::: info ℹ️ Further Reading on Principles
|
||||
If you want to learn more about the underlying principles, check out the appendix: [Introduction to Vision Language Models (VLM)](/zh-cn/appendix/8-artificial-intelligence/multimodal-models).
|
||||
If you want to learn more about the underlying principles, check out the appendix: [Introduction to Vision Language Models (VLM)](/en/appendix/8-artificial-intelligence/multimodal-models).
|
||||
|
||||
::: details Learn More: What is Qwen3 VL?
|
||||
|
||||
@@ -337,25 +341,158 @@ caption = caption_image(image_path)
|
||||
|
||||
:::
|
||||
|
||||
For image-generation integration, failures can be noisy and hard to debug. Ask AI IDE to display full error details every time (instead of only "generation failed"), for example:
|
||||
In this scenario, we directly try asking the AI IDE to implement a feature that automatically generates ecommerce selling-point text and keywords from uploaded images, as shown below:
|
||||
|
||||
```text
|
||||
Based on the image-to-text API below, help us implement a feature that automatically generates ecommerce selling points and keywords from uploaded images.
|
||||
|
||||
<code omitted here; you need to paste your key and the reference code yourself>
|
||||
```
|
||||
|
||||
Final generated result:
|
||||

|
||||
|
||||

|
||||
|
||||
<div style="margin: 50px 0;">
|
||||
<ClientOnly>
|
||||
<StepBar :active="2" :items="[
|
||||
{ title: 'API Basics', description: 'Understand core concepts and security practices' },
|
||||
{ title: 'Text Integration', description: 'DeepSeek text generation hands-on' },
|
||||
{ title: 'Image Integration', description: 'VLM image understanding and generation' }
|
||||
]" />
|
||||
</ClientOnly>
|
||||
</div>
|
||||
|
||||
# 4. Integrating the Image Generation API: Seedream
|
||||
|
||||
In the previous section, we mainly handled text-related tasks. Next, we will try integrating image generation capabilities to support generating images from text descriptions, or editing images.
|
||||
|
||||
::: info ℹ️ Further Reading on Principles
|
||||
If you want to learn more about the underlying principles, check out the appendix: [Introduction to Image Generation](/en/appendix/8-artificial-intelligence/image-generation).
|
||||
|
||||
::: details Learn More: What is [Seedream](https://seed.bytedance.com/en/seedream4_5)?
|
||||
|
||||

|
||||
|
||||
> You may already know Nano Banana (developed by Google), but you should not miss Seedream. Seedream 4.5 is a next-generation image creation model built by ByteDance. It integrates image generation and image editing capabilities into one unified architecture. This enables it to handle complex multimodal tasks such as knowledge-based generation, complex reasoning, and reference consistency. In addition, its inference speed is much faster than the previous generation and it can generate stunning high-definition images up to 4K resolution.
|
||||
>
|
||||
> 
|
||||
> 
|
||||
|
||||
**Main capabilities:**
|
||||
|
||||
- **Text-to-image**: Generate images from text prompts, supporting many styles (realistic, cartoon, ink, cyberpunk, etc.)
|
||||
- **Style transfer**: Convert an image into a specified artistic style
|
||||
- **Image variants**: Generate new images in similar styles from reference images
|
||||
- **Resolution enhancement**: Improve image clarity and detail
|
||||
- **Image editing**: Edit existing images through natural-language instructions
|
||||
|
||||
**Why choose Seedream?**
|
||||
|
||||
- **Stable domestic network access**: Fast access and low latency in China
|
||||
- **Excellent output quality**: Reliable performance in ecommerce and asset-generation scenarios
|
||||
- **Chinese-optimized understanding**: Better understanding of Chinese prompts for domestic users
|
||||
- **Fast speed**: High generation efficiency and short response times
|
||||
- **Stable quality**: Can generate high-definition images up to 4K
|
||||
|
||||
**Typical use cases:**
|
||||
|
||||
- Ecommerce: Generate main images, detail-page assets, and promotional posters
|
||||
- Social media: Generate avatars, stickers, and supporting visuals
|
||||
- Design: Quickly produce concept images, assets, and backgrounds
|
||||
- Marketing: Create ad images, campaign banners, and holiday posters
|
||||
|
||||
**How it works with Qwen3 VL:**
|
||||
|
||||
These two APIs can be chained together: first use Qwen3 VL to analyze a reference image and understand scene content, then use Seedream to generate new images based on prompts derived from that analysis.
|
||||
:::
|
||||
|
||||
Many "AI posters / AI product main images / AI character images" you see on Douyin, Bilibili, or YouTube are fundamentally built with this kind of technology. What you need to do is simple: organize user input into one sentence, request the image API, and display the returned image. The model used here is an image generation / image editing model.
|
||||
|
||||
We will demonstrate step by step how to integrate the Seedream API into your project (with AI IDE assistance).
|
||||
|
||||
After visiting the [homepage](https://www.volcengine.com/experience/ark?launch=seedream), click login.
|
||||
|
||||

|
||||
|
||||
After logging in, find the top-right recharge option.
|
||||
|
||||

|
||||
|
||||
Real-name verification is required before recharge.
|
||||
|
||||

|
||||
|
||||
After verification succeeds, you can [recharge 1 RMB for testing](https://console.volcengine.com/finance/fund/recharge).
|
||||
|
||||
Return to the [initial page](https://www.volcengine.com/experience/ark?launch=seedream) and click API Access.
|
||||
|
||||

|
||||
|
||||
First, create an API key, then click the model selection option.
|
||||
|
||||

|
||||
|
||||
This takes you to step 2. Here, confirm the service model is Seedream 4.5 and copy the provided call example. (The screenshot was taken earlier, so the model version shown there is still 4.0.)
|
||||
|
||||

|
||||
|
||||
Once the API Key and call example are ready, you can paste them directly into the AI IDE and ask it to generate a frontend interactive demo or integrate the capability into your current prototype. Notice that in the screenshot you can choose text-to-image or multi-image-to-single-image mode. Select the reference code according to your specific requirement.
|
||||
|
||||
::: warning ⚠️ Important note
|
||||
The default example here is relatively complex. Remember to disable **"Add watermark"** and **"Streaming response"** to ensure no watermark is generated and requests do not fail.
|
||||
:::
|
||||
|
||||
Since we later use reference-image generation mode, we first use the multi-image-to-single-image feature. The reference code is copied as follows:
|
||||
|
||||
```text
|
||||
curl -X POST https://ark.cn-beijing.volces.com/api/v3/images/generations \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer xxxxxxx" \
|
||||
-d '{
|
||||
"model": "doubao-seedream-4-5-251128",
|
||||
"prompt": "将图1的服装换为图2的服装",
|
||||
"image": ["https://ark-project.tos-cn-beijing.volces.com/doc_image/seedream4_imagesToimage_1.png", "https://ark-project.tos-cn-beijing.volces.com/doc_image/seedream4_imagesToimage_2.png"],
|
||||
"sequential_image_generation": "disabled",
|
||||
"response_format": "url",
|
||||
"size": "2K",
|
||||
"stream": false,
|
||||
"watermark": true
|
||||
}'
|
||||
```
|
||||
|
||||
With the image reference code prepared, we ask the AI IDE to support common image-task features in ecommerce:
|
||||
|
||||
```text
|
||||
Please help me implement common ecommerce features in this project based on the API below (for example, poster generation, Douyin ecommerce hero-image generation, etc.)
|
||||
|
||||
<paste the API KEY and the image-editing code here>
|
||||
```
|
||||
|
||||
Implementation result:
|
||||
|
||||

|
||||
|
||||
It is worth noting that image generation often encounters odd failures. It is recommended that AI IDE always shows full error details so you can copy and debug effectively. For example, you can say:
|
||||
|
||||
```text
|
||||
Don't only show "image generation failed." Please always display the full failure reason, such as model mismatch, request errors, or timeout details.
|
||||
```
|
||||
|
||||
If changes are not reflected in the page after multiple attempts, ask AI IDE to restart the project.
|
||||
Sometimes updates after edits may still not be reflected on the page. If you keep seeing errors after multiple rounds, you can also try telling the AI IDE directly: please restart this project.
|
||||
|
||||
In ecommerce scenarios, you may want uploaded clothes to be automatically "worn" by virtual models, or automatically generate attractive product posters and promotional visuals. Here is an example prompt for generating an ecommerce poster:
|
||||
In ecommerce scenarios, we may want clothes uploaded by users to be automatically worn by a model, or automatically generate attractive product sales images and posters. Here we try a prompt that asks for an ecommerce poster:
|
||||
|
||||

|
||||
|
||||
You can combine text-to-image and image-to-image APIs based on your own business scenario ideas.
|
||||
|
||||
## 4. More Image Service Options
|
||||
## More Different Image Service Options
|
||||
|
||||
Below are additional choices. It's recommended to first run through a working Qwen image generation result, then replace with another service based on quality and cost.
|
||||
|
||||
### 4.1 Recraft Integration
|
||||
### Recraft Integration
|
||||
|
||||
If your prototype is more design-production oriented (for example brand-style illustrations, marketing posters, vector-style assets), Recraft is often a better fit. The integration method is exactly the same: **get a Key + find official examples + let AI IDE wire them into your page/button**.
|
||||
|
||||
@@ -380,7 +517,7 @@ Then follow the same process and use official documentation examples:
|
||||
|
||||
:::
|
||||
|
||||
### 4.2 Qwen Image / Qwen Image Edit Integration
|
||||
### Qwen Image / Qwen Image Edit Integration
|
||||
|
||||
If you want a relatively simple way to integrate image generation, Qwen Image is also a good choice. The approach is unchanged: treat it as an image API and connect it to your prototype button.
|
||||
|
||||
@@ -653,3 +790,9 @@ In the next chapter, we will connect these separate AI capabilities into one com
|
||||
- Connect content planning, product listing, and data analysis into one end-to-end workflow
|
||||
- Embed this chapter's AI capabilities (LLM copywriting, text-to-image, image editing) into concrete business nodes
|
||||
- Build a truly usable "Ecommerce AI Workbench" instead of isolated demos
|
||||
|
||||
<RelatedArticlesSection
|
||||
title="Related Articles"
|
||||
description="A recommended learning path from single-point AI capabilities to complete product workflows."
|
||||
:items="relatedArticles"
|
||||
/>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -316,7 +316,7 @@ This document summarizes **LLM large model applications in B-End enterprise scen
|
||||
<el-card shadow="hover" style="margin-top: 16px; margin-bottom: 24px; border-left: 5px solid #409EFF;">
|
||||
<div style="font-weight: 600; margin-bottom: 8px;">Find the application scenario suitable for you</div>
|
||||
<div style="color: #606266; font-size: 14px; line-height: 1.6; margin-bottom: 12px;">
|
||||
Select your interest direction and purpose, the system will recommend related industry directions. Click on tags to jump to corresponding chapters.
|
||||
Select your interest direction and target purpose. The system recommends related industry scenarios. Click a row to jump to the corresponding chapter.
|
||||
</div>
|
||||
<el-row :gutter="16">
|
||||
<el-col :span="12">
|
||||
@@ -348,24 +348,43 @@ This document summarizes **LLM large model applications in B-End enterprise scen
|
||||
</el-row>
|
||||
|
||||
<div v-if="recommendationTopics.length > 0" style="margin-top: 16px;">
|
||||
<div style="font-weight: 600; margin-bottom: 12px; color: #409EFF;">
|
||||
Recommended {{ currentSelection.interest }} × {{ currentSelection.purpose }} industries:
|
||||
<div style="font-weight: 600; margin-bottom: 10px; color: #409EFF;">
|
||||
{{ recommendationTopics.length }} recommended scenarios for you
|
||||
<span style="font-weight: normal; color: #909399; font-size: 13px; margin-left: 8px;">
|
||||
({{ currentSelection.interest }} + {{ currentSelection.purpose }})
|
||||
</span>
|
||||
</div>
|
||||
<div style="display: flex; flex-wrap: wrap; gap: 8px;">
|
||||
<el-tag
|
||||
v-for="topic in recommendationTopics"
|
||||
:key="topic.title"
|
||||
type="primary"
|
||||
effect="light"
|
||||
style="cursor: pointer; margin-bottom: 4px;"
|
||||
@click="scrollToAnchor(topic.industryAnchor)"
|
||||
<el-table
|
||||
:data="recommendationTopics"
|
||||
style="width: 100%; cursor: pointer;"
|
||||
@row-click="(row) => scrollToAnchor(row.industryAnchor)"
|
||||
highlight-current-row
|
||||
>
|
||||
{{ topic.title }}
|
||||
</el-tag>
|
||||
<el-table-column prop="title" label="Application Scenario" min-width="300">
|
||||
<template #default="scope">
|
||||
<div style="font-weight: 500; color: #303133;">{{ scope.row.title }}</div>
|
||||
<div style="font-size: 12px; color: #909399; margin-top: 4px;">{{ scope.row.desc }}</div>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="industryName" label="Industry" width="180" align="center">
|
||||
<template #default="scope">
|
||||
<el-tag type="info" effect="light" size="small">{{ scope.row.industryName }}</el-tag>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
<div style="margin-top: 10px; font-size: 12px; color: #909399;">
|
||||
💡 Click any row in the table to jump to the corresponding industry section
|
||||
</div>
|
||||
<el-button type="text" size="small" @click="resetSelection" style="margin-top: 8px;">
|
||||
Reset Selection
|
||||
</el-button>
|
||||
</div>
|
||||
|
||||
<div v-else-if="!interestPoint || !purpose" style="margin-top: 14px; color: #909399; font-size: 13px;">
|
||||
<span v-if="!interestPoint && !purpose">💡 Please select both interest direction and purpose</span>
|
||||
<span v-else-if="!interestPoint">💡 Please select an interest direction</span>
|
||||
<span v-else>💡 Please select a purpose</span>
|
||||
</div>
|
||||
|
||||
<div v-if="interestPoint || purpose" style="margin-top: 12px;">
|
||||
<el-button size="small" @click="resetSelection">Reset Selection</el-button>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
@@ -411,6 +430,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Technical Documentation Auto-Generation System | LLM auto-generates product specifications, operation manuals; supports multi-format export |
|
||||
| 4 | Production Equipment Inspection Report Auto-Generation | Voice input describes equipment status; structured inspection report auto-generated |
|
||||
| 5 | Industrial Equipment Fault Diagnosis Q&A | Builds vector knowledge base from historical fault cases; provides intelligent diagnosis suggestions |
|
||||
| 6 | LLM Information-Retrieval Data Warehouse | Uses Text-to-SQL to convert natural-language queries into database queries; Superset visualizes results; Doris or ClickHouse as OLAP engine |
|
||||
| 7 | Industrial Equipment Fault-Diagnosis Knowledge Q&A Assistant | Builds a vector knowledge base from historical fault cases; LLM provides diagnosis suggestions and solution plans based on fault descriptions |
|
||||
| 8 | Production Quality Inspection Report Generation and Defect Classification | OCR identifies defects in inspection photos; LLM generates structured quality reports and classifies defect type and severity |
|
||||
| 9 | Inventory Counting Assistant and Inventory Report Generation | Inputs stocktaking data; LLM compares with system inventory and generates discrepancy reports with abnormal-inventory alerts |
|
||||
| 10 | Process Optimization Suggestion Intelligent Q&A System | Builds a RAG knowledge base from process documents; LLM provides optimization suggestions based on production issues |
|
||||
|
||||
---
|
||||
|
||||
@@ -425,6 +449,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Enterprise Internal Knowledge Intelligent Q&A | Builds vector knowledge base from internal documents; provides precise Q&A service for employees |
|
||||
| 4 | Customer Service Conversation Smart Summary | Automatically generates conversation summaries; extracts key information and creates follow-up tickets |
|
||||
| 5 | Golden Script Recommendation Knowledge Base | Analyzes excellent service cases; extracts golden scripts for team sharing and training |
|
||||
| 6 | Customer Service Script Compliance Auto-Check Assistant | Customer-service staff input reply drafts; LLM checks script compliance and sensitive words in real time and provides revision suggestions |
|
||||
| 7 | Customer Service Ticket Auto-Summary and Classification Tool | LLM summarizes long conversations and auto-classifies tags; Elasticsearch supports full-text ticket search |
|
||||
| 8 | Customer Emotion Monitoring and Abnormality Alert Tool | Real-time analysis of voice tone and text sentiment; LLM identifies abnormal emotions and triggers alerts with WebSocket push |
|
||||
| 9 | Golden Script Recommendation Knowledge-Base System for Customer Service | LLM analyzes excellent customer-service conversations, refines high-performing templates, and recommends scripts based on context |
|
||||
| 10 | Intelligent Outbound-Call Conversation Analysis and QA Assistant | After outbound-call recording transcription, LLM extracts key information; automatically generates QA reports and improvement suggestions |
|
||||
|
||||
---
|
||||
|
||||
@@ -439,6 +468,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Homework Auto-Grading & Learning Diagnosis | OCR recognizes handwritten answers; AI provides grading and improvement suggestions |
|
||||
| 4 | Job Competency Model & Learning Map | Analyzes job requirements; generates competency models and corresponding learning paths |
|
||||
| 5 | Foreign Language Oral Practice with AI | LLM plays role-play partners; simulates various real-life scenarios for speaking practice |
|
||||
| 6 | School-Based Curriculum Construction and Courseware Production Tool | LLM analyzes school characteristics and student needs to generate curriculum frameworks; integrates PPT generation APIs for automatic courseware creation |
|
||||
| 7 | College-Application Recommendation and Career Planning Platform | LLM analyzes candidate scores, ranking, interests, and other factors, then combines admissions data to recommend schools and majors |
|
||||
| 8 | Youth Programming Code Assistant | LLM explains code logic and provides coding guidance; supports switching between block languages and Python |
|
||||
| 9 | Knowledge-Point Mind Map Auto-Generation and Learning-Path Recommendation Tool | Input course topics; LLM automatically generates knowledge maps and recommends next-step learning content based on progress |
|
||||
| 10 | Chinese/English Essay Auto-Scoring and Correction Engine | LLM scores from dimensions such as idea, structure, language, and diversity, and generates annotations with high-quality sample comparison |
|
||||
|
||||
---
|
||||
|
||||
@@ -453,6 +487,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Unit Test Auto-Generation | Analyzes source code structure; generates boundary condition test cases automatically |
|
||||
| 4 | Code Quality Analysis Tool | Analyzes code complexity, security vulnerabilities; provides optimization recommendations |
|
||||
| 5 | UI Code Auto-Generation from Design | Uploads design draft images; AI generates responsive HTML/CSS code |
|
||||
| 6 | Natural Language to SQL Auto-Generation Tool | LLM converts natural-language data requests to SQL and supports complex multi-table joins and aggregation queries |
|
||||
| 7 | API Automated Testing and Documentation Generation Platform | LLM analyzes code comments and API definitions, auto-generates test cases and API docs, and integrates Postman for test execution |
|
||||
| 8 | System Log Analysis and Fault Localization | ELK Stack collects log data; LLM extracts key anomaly information and locates root causes, then recommends fixes |
|
||||
| 9 | Frontend UI Code Auto-Generation Tool | OCR recognizes layout structures from design images; LLM generates responsive CSS and component code with TailwindCSS integration |
|
||||
| 10 | Intelligent Database Schema Design and Modeling Assistant | Input business requirement docs to LLM to auto-generate ER diagrams and schema definitions; supports exporting MySQL/PostgreSQL DDL scripts |
|
||||
|
||||
---
|
||||
|
||||
@@ -467,6 +506,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Clinical Research Data Analysis Platform | Integrates EMR data; assists in generating statistical analysis code for research |
|
||||
| 4 | Medical Imaging Report Auto-Generation | Describes imaging features; generates structured medical imaging reports |
|
||||
| 5 | Chronic Disease Medication Reminder | Generates personalized medication plans; supports drug interaction and contraindication checks |
|
||||
| 6 | Drug Package-Insert Intelligent Q&A Assistant | Upload package-insert images or input drug names; LLM answers dosage, side effects, and precautions |
|
||||
| 7 | Disease Knowledge Popular-Science Article Generator | Input disease name and audience type; LLM generates easy-to-understand educational content and supports multiple versions |
|
||||
| 8 | Medical Imaging Report Auto-Generation Tool | Radiologists describe imaging features; LLM auto-generates structured report content and supports common exam templates |
|
||||
| 9 | Surgical Record Intelligent Generation and Archiving Assistant | Voice input records key surgical steps; LLM generates structured surgical records and auto-links surgery codes |
|
||||
| 10 | Chronic Disease Medication Reminder Intelligent Assistant | Patients input medication lists; LLM generates personalized reminders and supports contraindication checking and interactive Q&A |
|
||||
|
||||
---
|
||||
|
||||
@@ -481,6 +525,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Security Operations Daily Report | Aggregates security logs; automatically extracts and generates daily reports |
|
||||
| 4 | Penetration Test Report Generation | Inputs vulnerability descriptions; AI generates complete penetration test reports |
|
||||
| 5 | Threat Intelligence Analysis Assistant | Connects to threat intelligence sources; interprets and analyzes potential threats |
|
||||
| 6 | Malicious Code Protection and Privacy Compliance Monitoring | Sandboxes suspicious-file behavior; LLM identifies malicious features and generates signatures; scans sensitive data exposure |
|
||||
| 7 | Security Configuration Compliance Checklist Generation Tool | Input target system type; LLM generates configuration checklists supporting standards such as MLPS 2.0 and CIS |
|
||||
| 8 | Threat Intelligence Intelligent Query and Analysis Assistant | Connects multi-source threat intelligence (open-source/commercial); LLM interprets intelligence and links it with enterprise assets |
|
||||
| 9 | Security Incident Postmortem Report Generation Assistant | After incidents, LLM auto-generates timeline-based postmortem reports with root-cause analysis and remediation suggestions |
|
||||
| 10 | Global Threat Intelligence Monitoring and Alert Center | Crawlers collect global security news and vulnerability disclosures; LLM extracts key information, assesses impact, and sends alerts |
|
||||
|
||||
---
|
||||
|
||||
@@ -495,6 +544,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | IPO Prospectus Generation & Compliance Check | Uses modular templates; auto-fills business descriptions with compliance verification |
|
||||
| 4 | Financial Report & Anomaly Warning | Auto-generates financial analysis reports; monitors business anomalies in real-time |
|
||||
| 5 | Insurance Agent Practice Coach | Simulates customer scenarios; evaluates script compliance and persuasion skills |
|
||||
| 6 | Compliance Case Intelligent Retrieval and Q&A Assistant | Builds knowledge bases from regulatory penalty cases; LLM answers compliance questions and provides relevant case references |
|
||||
| 7 | Insurance Agent Intelligent Script Practice | LLM plays different customer personas for simulation and evaluates script compliance and persuasion with transcription analysis |
|
||||
| 8 | Insurance Product Clause Analysis and Competitor Comparison Platform | Parses clauses structurally; LLM generates feature summaries and key cautions |
|
||||
| 9 | Customer Script Emotion Recognition Service | Combines voice-emotion recognition with script-compliance checks and gives real-time coaching suggestions |
|
||||
| 10 | Insurance Claim Progress Intelligent Query and Dialogue Assistant | Users input policy or case numbers; LLM queries claim status and answers claim-related questions |
|
||||
|
||||
---
|
||||
|
||||
@@ -509,6 +563,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Marketing Content Auto-Generation | Generates marketing copy, social media posts, and advertising scripts |
|
||||
| 4 | Competitor Ad Analysis Platform | Collects and analyzes competitor advertising strategies |
|
||||
| 5 | Hot Topic Analysis & Content Recommendation | Analyzes trending topics; recommends content creation angles |
|
||||
| 6 | Resume Intelligent Parsing and Job Matching System | Parses resume PDFs to extract key information; LLM matches suitable roles and generates interview suggestions; integrates with ATS systems |
|
||||
| 7 | Employee Onboarding Guidance and Q&A Assistant | Uses RAG retrieval over onboarding docs; LLM answers common new-hire questions |
|
||||
| 8 | Employee Performance Feedback and OKR Management Platform | Collects OKR data; LLM analyzes goal completion and generates feedback suggestions with 360-feedback integration |
|
||||
| 9 | Intelligent Meeting Minutes and To-Do Management | Transcribes meeting recordings; LLM extracts key points and action items; auto-creates tasks in task systems |
|
||||
| 10 | Invoice Recognition and Expense Reimbursement Auto-Processing | OCR recognizes invoice fields and automatically checks authenticity and reimbursement compliance; integrates with finance systems |
|
||||
|
||||
---
|
||||
|
||||
@@ -523,6 +582,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Digital Human Live Streaming System | Creates digital human anchors; generates real-time dialogue for live streaming |
|
||||
| 4 | Short Video Script & Editing | Generates short video scripts; provides intelligent editing suggestions |
|
||||
| 5 | Marketing Content Design System | Generates advertising copy and designs marketing materials |
|
||||
| 6 | Intelligent Marketing Content Generation and Design System | Input product information; LLM generates marketing copy and selling-point extraction; integrates with template-design tools |
|
||||
| 7 | Multi-Platform Ad ROI Real-Time Monitoring and Strategy Optimization System | Connect ad-platform APIs for data collection; LLM analyzes performance and generates optimization suggestions with anomaly alerts |
|
||||
| 8 | Search-Engine Keyword and Traffic Analysis | Collect keyword-tool data; LLM analyzes trend and competition and recommends topic direction |
|
||||
| 9 | Competitor Ad Placement Analysis Platform | Uses third-party data APIs to collect competitor ads; LLM analyzes placement strategy and creative patterns |
|
||||
| 10 | Full-Network Hot Topic Analysis and Content Recommendation System | Collects trending data; LLM analyzes trend shifts and recommends content angles with calendar scheduling |
|
||||
|
||||
---
|
||||
|
||||
@@ -537,6 +601,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Enterprise Policy Matching Platform | Analyzes enterprise profiles; intelligently matches applicable support policies |
|
||||
| 4 | Approval Materials Pre-Review | OCR recognizes application materials; automatically checks completeness |
|
||||
| 5 | City Grid Event Management | Identifies event types from reports; intelligently dispatches to responsible departments |
|
||||
| 6 | Social Sentiment Big-Data Analysis and Risk Early Warning System | Fuses multiple sources such as hotlines, online sentiment, and field visits; LLM identifies risk hotspots |
|
||||
| 7 | Government Archive Digitization Recognition and Intelligent Filing Platform | OCR recognizes archive text; LLM extracts key information and auto-classifies; supports full-text retrieval |
|
||||
| 8 | Emergency Command and Rescue Resource Intelligent Dispatch Platform | Collects emergency-event data; LLM generates emergency response plans with resource-dispatch optimization |
|
||||
| 9 | Grid-Based Atmospheric Pollution Monitoring and Precision Traceability System | Collects air-quality sensor data; CV identifies pollution sources; LLM analyzes trends and traces causes |
|
||||
| 10 | Public-Safety Incident Intelligent Risk Warning Assistant | Integrates historical events and real-time reports; LLM estimates risk levels and outputs warning recommendations |
|
||||
|
||||
---
|
||||
|
||||
@@ -551,6 +620,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Legal Regulation Change Monitoring | Monitors regulatory updates; analyzes impact on business operations |
|
||||
| 4 | Legal Letter Auto-Drafting | Inputs case facts; AI generates standard legal letters |
|
||||
| 5 | Legal Terms Plain Language Explanation | Translates complex legal terms into easy-to-understand language |
|
||||
| 6 | Courtroom Recording Real-Time Transcription and Dispute-Focus Extraction Recorder | ASR transcribes hearing audio; LLM extracts dispute focuses and key arguments with timestamps |
|
||||
| 7 | Full-Network IP Infringement Clue Monitoring and Blockchain Evidence Preservation System | Monitors e-commerce and social media infringement; automatically collects and preserves evidence |
|
||||
| 8 | LLM-Based IPO Prospectus Key-Data Consistency Check and Risk Alert Agent | Compares data across prospectus sections; LLM identifies inconsistencies and abnormal values with risk tags |
|
||||
| 9 | Complex Legal Clause "Translation" Plugin in Plain Language | Users select legal clauses and LLM outputs understandable explanations |
|
||||
| 10 | Case Evidence-Chain Intelligent Structuring and Visualization System | Upload evidence materials; LLM analyzes evidence relationships and timelines |
|
||||
|
||||
---
|
||||
|
||||
@@ -565,6 +639,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Visa Materials Pre-Review | OCR recognizes visa materials; automatically checks for completeness |
|
||||
| 4 | Real-Time Translation for Travel | Offline voice translation; recognizes and translates menu images abroad |
|
||||
| 5 | Travel Notes Auto-Generation | Extracts information from travel photos; generates shareable travel journals |
|
||||
| 6 | Data-Driven Hotel "Pitfall Avoidance" Analyzer Based on Real Reviews | Collects hotel review data; LLM extracts positive and negative keyword patterns |
|
||||
| 7 | Immersive Destination VR Preview and Virtual Room Selection Platform | Collects 360-degree panoramas; VR enables immersive previews and virtual room tours |
|
||||
| 8 | Travel Footprint Auto-Generated Travel Notes and Social Copy Assistant | Extracts time/location metadata from photos; LLM generates travel notes with template-based layout |
|
||||
| 9 | Enterprise Travel Invoice Aggregation and Compliance Reimbursement Management Platform | Connects travel-platform APIs for automatic invoice collection and compliance checks |
|
||||
| 10 | Scenic-Area Crowd Congestion Prediction and Off-Peak Route Navigation | Collects scenic-area crowd data; ML predicts congestion windows and recommends off-peak routes |
|
||||
|
||||
---
|
||||
|
||||
@@ -579,6 +658,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Cognitive Training for Elderly | Provides cognitive games; uses old photos to trigger memory for dementia patients |
|
||||
| 4 | Social Anxiety Practice Coach | Creates virtual social scenarios; helps practice social interactions |
|
||||
| 5 | Mood Monitoring & Incentive Assistant | Analyzes mood patterns; generates positive encouragement content |
|
||||
| 6 | Generative AI Customized Bedtime Story Machine for Children | Parents input themes/preferences; LLM generates customized stories with background music support |
|
||||
| 7 | Deceased Digital-Life Reconstruction and LLM Cross-Time Dialogue System | Trains personalized models from pre-death voice/text data and generates memory-based conversations |
|
||||
| 8 | MBTI-Based AI Personality Mirror and Empathetic Chatbot | Inputs MBTI results; LLM outputs personality analysis and empathetic responses with match suggestions |
|
||||
| 9 | Privacy-Protected AI Confession Tree-Hole for Teenagers | Anonymous channel for emotional expression; LLM provides listening/suggestions with sensitive-word alerts |
|
||||
| 10 | Self-Evolving AI Virtual Pet Growth System | Trains pet personality models and supports interaction-driven growth and virtual customization |
|
||||
|
||||
---
|
||||
|
||||
@@ -593,6 +677,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Interactive Novel Story Generator | Reader choices affect story development |
|
||||
| 4 | Esports Game Analysis & Commentary | Real-time game analysis with AI-powered commentary |
|
||||
| 5 | Audiobook Auto-Generation | Converts text to audio with character-specific voices |
|
||||
| 6 | Personalized Humor Content Recommendation Algorithm Engine | Builds user-interest profiles and recommends matching humor content |
|
||||
| 7 | AI Smart Vocal Tuning and KTV Voice Enhancement Software | Performs denoising and vocal enhancement with AI tuning algorithms |
|
||||
| 8 | Film/TV Character-Centric Plot Extraction and Editing Tool | Analyzes video content, extracts character-related clips, and auto-generates edited cuts |
|
||||
| 9 | Multi-Role TTS Audiobook Auto-Generation System | Assigns text roles and generates personalized voices with background music/effects |
|
||||
| 10 | Board-Game Reinforcement-Learning Review Coach | Analyzes game records, simulates AI opponents, and generates review suggestions |
|
||||
|
||||
---
|
||||
|
||||
@@ -607,6 +696,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Multi-Language Translation | Localizes product descriptions for international markets |
|
||||
| 4 | Digital Human Live Streaming | AI-powered virtual streamers for 24/7 live commerce |
|
||||
| 5 | Trend Analysis & Product Selection | Analyzes market trends; suggests trending products to sell |
|
||||
| 6 | Full-Network Same-Product AI Price Comparison and Trend Prediction Plugin | Crawls e-commerce prices, displays comparison charts, and predicts price trends |
|
||||
| 7 | Buyer-Show Image AI Selection and Short-Video Synthesis Platform | Scores buyer-show images, auto-recommends high-quality content, and synthesizes short videos from templates |
|
||||
| 8 | LLM-Based Real-Time Sales Dialogue Voice Analysis and Golden-Script Recommendation | ASR transcribes calls and performs real-time script compliance checks with recommendation output |
|
||||
| 9 | Market Trend AI Insight and Best-Seller Prediction Engine | Collects and analyzes social media and e-commerce data; LLM identifies trend hotspots and recommends product choices |
|
||||
| 10 | Private-Domain User Profiling AI Clustering and Precision Operations System | Clusters user behavior data, generates profile tags, and triggers automated marketing flows |
|
||||
|
||||
---
|
||||
|
||||
@@ -621,6 +715,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Electricity Price Prediction | ML predicts spot prices; generates trading strategies |
|
||||
| 4 | Carbon Emission Calculation | Auto-calculates enterprise carbon footprint; generates ESG reports |
|
||||
| 5 | Grid Load Prediction | Predicts grid load under extreme weather; generates dispatch plans |
|
||||
| 6 | Gas-Station Violation AI Video Recognition and Alert Guard | Analyzes surveillance video and detects violations (calling/smoking, etc.) with alert pushes |
|
||||
| 7 | Long-Distance Oil/Gas Pipeline Leak Acoustic AI Monitoring and Precision Positioning System | Collects acoustic-sensor data for leak detection and localization algorithms |
|
||||
| 8 | Virtual Power Plant Resource Aggregation and AI Power-Trading Decision System | Connects distributed resources for aggregated optimization dispatch and strategy execution |
|
||||
| 9 | Mine Personnel AI Position Tracking and Dangerous-Area Intrusion Alarm | Uses UWB/Bluetooth positioning for trajectory tracking and geofenced danger-zone alerts |
|
||||
| 10 | Energy-Storage Battery Health AI Assessment and Thermal-Runaway Warning | Monitors battery runtime data, evaluates health status, and triggers thermal-risk alerts |
|
||||
|
||||
---
|
||||
|
||||
@@ -635,6 +734,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Video Restoration & Colorization | 4K super-resolution; AI adds color to black and white footage |
|
||||
| 4 | Text-to-Speech with Emotion | Generates natural-sounding speech with emotional expression |
|
||||
| 5 | Meeting Transcription | Multi-speaker voice separation; generates meeting transcripts with action items |
|
||||
| 6 | Video Object Removal AI Engine | Uses object tracking and inpainting to remove unwanted objects with frame-level consistency |
|
||||
| 7 | Copyright-Safe Background Music AIGC Auto-Composer | Uses music-generation models with controllable emotional style and copyright checks |
|
||||
| 8 | Specific-Person Voice Clone and Voice Conversion Software | Trains timbre models from small voice samples and supports voice conversion |
|
||||
| 9 | One-Click Script-to-Storyboard and AI Dynamic Preview Video Platform | Parses scripts into storyboards and auto-generates previsualization videos |
|
||||
| 10 | Meeting Recording AI Smart Transcription and Core To-Do Extraction Assistant | Performs multi-speaker transcription and LLM-based to-do extraction with timestamps |
|
||||
|
||||
---
|
||||
|
||||
@@ -649,6 +753,11 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Logo & Brand Design | Generates brand logos; creates complete VI systems |
|
||||
| 4 | Trend Analysis & Content Ideas | Tracks trending topics; suggests marketing angles |
|
||||
| 5 | Video Script Generator | Generates short video scripts with shooting suggestions |
|
||||
| 6 | Competitor Marketing Strategy Deep Analysis and AI Weekly Report Generator | Collects/analyzes competitor content, extracts strategy insights, and auto-generates weekly reports |
|
||||
| 7 | Search-Engine Keyword AI Layout and Traffic Article Batch Writing | Analyzes keywords, generates articles at scale, and gives SEO optimization recommendations |
|
||||
| 8 | Personalized Marketing Email AI Writing Expert | Uses user-profile data for personalized content generation with A/B testing |
|
||||
| 9 | Brand Reputation Full-Network Monitoring and Crisis AI Alert Radar | Collects network sentiment data, runs sentiment analysis, and pushes crisis alerts |
|
||||
| 10 | Short-Video Script Creative AIGC Generation and Storyboard Guidance Assistant | Inputs themes and outputs scripts, storyboards, and practical shooting guidance |
|
||||
|
||||
---
|
||||
|
||||
@@ -663,3 +772,8 @@ Learners can choose directions based on these dimensions:
|
||||
| 3 | Data Quality Monitoring | Detects data anomalies; suggests fixes |
|
||||
| 4 | Report Generator | Creates reports and dashboards through conversation |
|
||||
| 5 | Metric Q&A Assistant | Answers questions about data metric definitions and calculations |
|
||||
| 6 | Intelligent Data-Report Interpretation and Trend Analysis Assistant | Upload report images or input data; VLM interprets chart content and analyzes trends |
|
||||
| 7 | Intelligent DB-Schema Interpretation and Query-Example Generation Assistant | Input table names or field descriptions; LLM generates schema explanations and sample SQL |
|
||||
| 8 | Enterprise Master-Data Intelligent Alignment and AI Dedup Governance | Matches master data across sources, identifies duplicates, and supports merge-rule configuration |
|
||||
| 9 | Data Requirement Doc to Test-Case Intelligent Conversion Tool | Input data requirement descriptions; LLM generates test scenarios and validation test cases |
|
||||
| 10 | Data Metric-Definition Intelligent Q&A Assistant | Builds a knowledge base from metric-definition docs; LLM answers definition and calculation logic questions |
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,3 @@
|
||||
# Build Your First Modern App: Full-Stack Application
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
@@ -0,0 +1,3 @@
|
||||
# Assignment 2: Modern Frontend Component Library + Trae Practice
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,178 @@
|
||||
# Using LLMs to Write API Code and API Documentation
|
||||
|
||||
In the previous chapters, we learned how to use tools like Figma to create UI drafts, how to use AI to quickly generate static frontend pages, and how to use Supabase to build databases and basic authentication. That naturally leads to a new question: when someone clicks those lively buttons on the frontend, how does the data actually get stored in Supabase? And when we need more complex business logic such as concurrent payments, scheduled pushes, or sensitive data processing, is it still safe to let the frontend talk directly to the database?
|
||||
|
||||
That question introduces one of the most important parts of modern web architecture: the **backend API**.
|
||||
|
||||
In the past, backend developers often wrote hundreds or thousands of lines of routing, controller, and validation logic by hand. Today, we can hand much of that repetitive scaffolding to large language models. In this chapter, we will move beyond vague "AI-generated code" and look at a real workflow for using strong prompts to guide an LLM into writing solid Node.js backend interfaces, plus the corresponding documentation and test cases.
|
||||
|
||||
> 💡 **Prerequisites**
|
||||
>
|
||||
> Before starting this chapter, it helps to understand:
|
||||
> - [From Database to Supabase](../2.2-database-supabase/) for basic database and data-model concepts
|
||||
> - [Git and GitHub Workflow](../2.4-git-workflow/) for project collaboration and version control
|
||||
> - [What Is the Terminal / Command Line](/en/appendix/2-development-tools/command-line-shell) for project initialization and startup commands
|
||||
|
||||
# What you will learn
|
||||
|
||||
1. **What an API is**: Understand the bridge between frontend and backend, plus basic RESTful design.
|
||||
2. **How LLMs help service construction**: Use structured prompts to generate a clean Node.js + Express starter project.
|
||||
3. **Interface logic development**: Guide the model to generate CRUD APIs with proper business validation and Supabase integration.
|
||||
4. **Automatic API documentation**: Ask the model to reverse-generate OpenAPI/Swagger docs from your code.
|
||||
5. **Testing and integration loops**: Use the model to create Postman collections and Jest unit tests to protect code quality.
|
||||
|
||||
---
|
||||
|
||||
# 1. Why do we need APIs?
|
||||
|
||||
Traditionally, the frontend is "the visible part" and the database is "the storage room." But something is missing between them: a coordinator.
|
||||
|
||||
If you imagine the application as a restaurant:
|
||||
|
||||
- The **frontend (client)** is the menu and ordering table, where customers browse and make requests.
|
||||
- The **database (Supabase, etc.)** is the kitchen storeroom, where ingredients and records are kept.
|
||||
- The **backend API** is the waiter. Customers should not run straight into the kitchen to grab ingredients. Instead, they tell the waiter what they want through an HTTP request. The waiter checks the request, verifies permissions, talks to the kitchen, and brings the result back through an HTTP response, usually in JSON.
|
||||
|
||||
Through APIs, we achieve a clean **frontend-backend separation**: the frontend focuses on rendering, while the backend focuses on business logic, data processing, and security.
|
||||
|
||||
---
|
||||
|
||||
# 2. Project architecture and initialization
|
||||
|
||||
A clear project skeleton is a prerequisite for getting high-quality code from an LLM. Before you ask AI to write code, you should already have a mental model of the structure you want.
|
||||
|
||||
## 2.1 A common API project structure
|
||||
|
||||
Even if an LLM is generating the code, you should not dump everything into one `server.js` file. A maintainable Node.js backend usually looks something like this:
|
||||
|
||||
```text
|
||||
my-api-project/
|
||||
├── .env # Sensitive environment variables such as API keys and DB URLs
|
||||
├── server.js # Project entry point: boot server, register global middleware
|
||||
├── package.json # Dependency management
|
||||
├── src/
|
||||
│ ├── routes/ # Route layer: define URLs and HTTP methods
|
||||
│ ├── controllers/ # Controller layer: process request params, call services, return responses
|
||||
│ ├── services/ # Service layer: database access and core business logic
|
||||
│ └── middlewares/ # Middleware: auth, global error handling
|
||||
└── docs/ # API documentation
|
||||
```
|
||||
|
||||
## 2.2 Use AI to initialize the project
|
||||
|
||||
Instead of manually running `npm init` and installing packages one by one, you can give the model the structure above in prompt form:
|
||||
|
||||
> 🗣️ **Prompt example**
|
||||
> "Help me scaffold a Node.js backend project that can connect to Supabase. Keep the structure clean and easy to maintain later."
|
||||
|
||||
If the prompt is good, the code you get back can already give you a backend app with a solid foundation running on `localhost:3000`.
|
||||
|
||||
---
|
||||
|
||||
# 3. Core practice: using LLMs to develop APIs
|
||||
|
||||
This is the heart of the chapter. When LLM-generated code feels superficial or unsafe, the root cause is usually missing context. **LLMs are not afraid of complex requirements. They are afraid of vague ones.**
|
||||
|
||||
Take the `menu_items` insert API from the [database chapter](../2.2-database-supabase/) as an example.
|
||||
|
||||
## 3.1 Give the model full context
|
||||
|
||||
Before asking the model to write an API, provide both the **database schema** and the **business constraints**.
|
||||
|
||||
> 🗣️ **High-quality prompt template**
|
||||
> "Help me write an API for creating a menu item. Each item includes a product name, price, category (burger, snack, drink), and whether it is listed. Product name and price are required. Price cannot be negative. Return helpful validation errors when the user input is invalid."
|
||||
|
||||
## 3.2 Review the generated code
|
||||
|
||||
A good model will often separate responsibilities clearly, for example:
|
||||
|
||||
```javascript
|
||||
// services/menuService.js
|
||||
const { createClient } = require('@supabase/supabase-js');
|
||||
const supabase = createClient(process.env.SUPABASE_URL, process.env.SUPABASE_KEY);
|
||||
|
||||
exports.createMenuItem = async (menuData) => {
|
||||
// Push data into the table via the Supabase SDK
|
||||
const { data, error } = await supabase
|
||||
.from('menu_items')
|
||||
.insert([menuData])
|
||||
.select();
|
||||
|
||||
if (error) throw new Error(`Database insert failed: ${error.message}`);
|
||||
return data[0];
|
||||
};
|
||||
```
|
||||
|
||||
You can see that, with enough context, the model generates something structurally cleaner: Supabase initialization is separated, errors are handled, and the code is easier to reason about. That is very different from the spaghetti code you usually get from a vague request like "write a create endpoint."
|
||||
|
||||
---
|
||||
|
||||
# 4. Free your hands: generate API documentation automatically
|
||||
|
||||
For a development team, an undocumented API is a blind box. Frontend engineers cannot guess what parameters are required or what the response shape will be. The most common API description standard in the industry is **OpenAPI** (formerly often called Swagger).
|
||||
|
||||
Writing Swagger YAML or JSON by hand used to be painful and error-prone. Now it is one of the areas where LLMs help the most.
|
||||
|
||||
You can select your `routes` and `controllers` code and ask:
|
||||
|
||||
> 🗣️ **Documentation prompt**
|
||||
> "Generate API documentation from the code above. Clearly explain what every parameter means and what data the endpoint returns, so the frontend team can integrate it easily."
|
||||
|
||||
You can even ask the model to fill in descriptions and mock example values such as `price_cents: 1200` for a $12.00 item. That reduces a lot of back-and-forth communication.
|
||||
|
||||
---
|
||||
|
||||
# 5. Safeguards: generate tests and Postman collections
|
||||
|
||||
After the code and docs are ready, there is still one more step: verifying that everything actually works.
|
||||
|
||||
## 5.1 Generate Postman or Apifox test configurations
|
||||
|
||||
When developing APIs, we often use tools like Postman to simulate HTTP requests. Without AI, you usually have to fill in URLs, headers, and JSON request bodies manually.
|
||||
|
||||
You can simply tell the model:
|
||||
|
||||
> "Convert this API documentation into a Postman-importable format and include both successful and failing request examples."
|
||||
|
||||
Once you save the returned JSON as something like `menu_api.json` and import it into Postman, you instantly get a ready-to-use testing panel.
|
||||
|
||||
## 5.2 Write automated unit tests
|
||||
|
||||
If you want stricter engineering quality, you can also ask the model to write tests with `Jest` or a similar framework. That is especially useful for boundary conditions, such as ensuring a negative price is rejected before data reaches the database.
|
||||
|
||||
---
|
||||
|
||||
# 6. Backend API best practices you still need to know
|
||||
|
||||
Even with AI support, you are still the gatekeeper of the system. You need to review the generated code against a few important principles:
|
||||
|
||||
1. **RESTful path naming**
|
||||
- Good: `GET /api/users` for listing users, `POST /api/users` for creating users
|
||||
- Bad: `POST /api/getUser` or `POST /api/createUser`
|
||||
The URL should represent the resource. The action belongs to the HTTP method.
|
||||
|
||||
2. **Correct HTTP status codes**
|
||||
- `200/201`: request succeeded / resource created successfully
|
||||
- `400`: bad request, invalid parameters or missing required fields
|
||||
- `401/403`: unauthorized / forbidden
|
||||
- `404`: resource not found
|
||||
- `500`: server error, such as backend exceptions or database failures
|
||||
Do not expose full backend stack traces to the frontend.
|
||||
|
||||
3. **Never trust user input**
|
||||
Frontend input can be forged. All important validation must run again on the backend.
|
||||
|
||||
# 7. Summary
|
||||
|
||||
After this chapter, your role should start to feel different. You are no longer just a typist trapped in syntax and punctuation. You are becoming a **system designer and architecture coordinator**.
|
||||
|
||||
You have now learned:
|
||||
|
||||
1. The core systems thinking behind **APIs and frontend-backend separation**
|
||||
2. How to dramatically improve LLM-generated backend code by providing **good context and layered structure**
|
||||
3. How to turn tedious **documentation writing** and **test creation** into automation tasks that AI handles well
|
||||
4. How to combine this with what you already learned about **Supabase** to complete the full flow from frontend request to database update
|
||||
|
||||
::: tip Next Step
|
||||
Once your data flow and backend service are ready, they still only run locally on your own machine. In the next chapter, we will learn how to **deploy** that service to a public server so your product can be accessed by real users.
|
||||
:::
|
||||
@@ -0,0 +1,255 @@
|
||||
# Git and GitHub Workflow
|
||||
|
||||
In previous chapters, we learned how to use web-based vibe coding tools to write code. Each conversation could generate a new version of the code. But that raises an important question: if we want to return to an earlier version, is there a convenient way to do it? Is there a tool that can record our code at different stages so we can switch between versions freely?
|
||||
|
||||
That is exactly why version control software exists. In this chapter, we will introduce the most famous version control system, **Git**, and the most popular code hosting platform, **GitHub**. You will learn how to manage code with Git, how to download code from GitHub, how to upload your own work, and how to collaborate with others on larger projects.
|
||||
|
||||
Whether you are tracking changes in a personal project, synchronizing code with teammates, or contributing to open source, Git and GitHub are essential tools for modern developers. Once you understand them, you can manage code more confidently, create checkpoints whenever needed, move between different stages of a project, and keep every change traceable.
|
||||
|
||||
> 💡 **Prerequisites**
|
||||
>
|
||||
> Before learning Git, it helps to understand:
|
||||
> - [What Is the Terminal / Command Line](/en/appendix/2-development-tools/command-line-shell)
|
||||
> - [What Is Git](/en/appendix/2-development-tools/git-version-control)
|
||||
>
|
||||
> This chapter focuses on the GitHub workflow and hands-on usage, while the links above cover the core fundamentals.
|
||||
|
||||
# Quick start with Git
|
||||
|
||||
Before using Git, make sure you already understand the basics of the command line and Git itself. This chapter assumes you have that foundation and moves directly into installation, configuration, and practical GitHub collaboration.
|
||||
|
||||
## How to install Git
|
||||
|
||||
We will briefly walk through installation on the three major operating-system families.
|
||||
|
||||
### Windows
|
||||
|
||||
1. Go to the [official Git download page](https://git-scm.com/download/win) and download the installer that matches your system. In most cases, the x64 installer is recommended.
|
||||
2. Double-click the installer and follow the setup wizard:
|
||||

|
||||
1. In most cases, keeping the default settings is fine. If you customize them, pay attention to:
|
||||
- **Default editor**: you can keep Vim, or choose Visual Studio Code if you already have it installed.
|
||||

|
||||
- **How Git is used from the command line**: a practical default is the option that adds Git to the command line and third-party software without overcomplicating the system setup.
|
||||

|
||||
3. After installation, right-click on the desktop. If you see `Git Bash Here`, the installation succeeded.
|
||||
|
||||

|
||||
|
||||
### macOS
|
||||
|
||||
On macOS, you can first run `git --version` in Terminal to check whether Git is already installed. If it is not, macOS often prompts you to install the developer tools automatically.
|
||||
|
||||
1. Method 1: install with Homebrew
|
||||
If you have [Homebrew](https://brew.sh/), open Terminal and run `brew install git`
|
||||
2. Method 2: install Xcode tools
|
||||
You can also install Xcode or the Xcode Command Line Tools from Apple. Git is included as part of that toolchain.
|
||||
|
||||
### Linux
|
||||
|
||||
Most Linux distributions install Git through the system package manager:
|
||||
|
||||
- Ubuntu / Debian:
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install git
|
||||
```
|
||||
|
||||
- CentOS / RHEL:
|
||||
|
||||
```bash
|
||||
sudo yum install git
|
||||
```
|
||||
|
||||
To verify the installation, run `git --version`. If a version number appears, Git is ready.
|
||||
|
||||
## Initialize Git identity
|
||||
|
||||
After installing Git, the first thing you should do is configure your user information. Run the following commands in the terminal and replace the values with your own:
|
||||
|
||||
```bash
|
||||
# Set the global username shown in commit history
|
||||
git config --global user.name "Your Name"
|
||||
|
||||
# Set the global email, ideally the same one you use on GitHub
|
||||
git config --global user.email "your.email@example.com"
|
||||
```
|
||||
|
||||
Git writes this information into every commit as the author identity. When you inspect the version history, you can clearly see who changed what and communicate more easily in collaborative projects.
|
||||
|
||||
You can confirm the configuration with:
|
||||
|
||||
```bash
|
||||
git config --list
|
||||
```
|
||||
|
||||
# What is GitHub?
|
||||
|
||||
GitHub is a code hosting platform built on top of Git. It provides remote storage for Git repositories and adds collaboration tools such as Issues, Pull Requests, and Projects. In simple terms, Git is the local version-control tool, while GitHub is the remote code warehouse and collaboration layer.
|
||||
|
||||
GitHub is also the world's largest and most influential open-source community. The idea of open source is that anyone can download and run the source code of a project. That allows people around the world to inspect each other's work, improve it, and build new things on top of it.
|
||||
|
||||

|
||||
|
||||
Large companies often open-source tools and tutorials on GitHub as part of their technical strategy. In the GitHub ecosystem, the number of `stars` a project receives is one of the most visible indicators of trust and influence.
|
||||
|
||||

|
||||
|
||||
In this course, many supporting resources and assignments are also published in GitHub repositories. By learning to upload your own work there, you gradually build the workflow you will use for real application development later.
|
||||
|
||||
## Create a GitHub account
|
||||
|
||||
1. Visit [GitHub](https://github.com/) and click `Sign up` in the top-right corner.
|
||||

|
||||
2. Enter your email address, create a password, and complete the verification steps.
|
||||
3. Confirm your email, and your account is ready.
|
||||
|
||||
## Create your first repository on GitHub
|
||||
|
||||
Next, let's create your first repository, often shortened to `repo`.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
When creating a repository, the main fields mean:
|
||||
|
||||
1. **Repository name**: the public-facing name of the repository
|
||||
2. **Description**: a short explanation of what the repository is for
|
||||
3. **Visibility**:
|
||||
- `Private`: only you and people you explicitly invite can see it
|
||||
- `Public`: anyone can see it
|
||||
4. **README**: it is good practice to add a README. Think of it as the repository's introduction and usage guide.
|
||||
5. **.gitignore and license**:
|
||||
1. `.gitignore` tells Git which files or folders should not be tracked, such as temporary files, dependency folders, or local secrets.
|
||||
2. `license` determines how others are allowed to use your open-source code.
|
||||
|
||||
For your first repository, it is reasonable to check `Add README`, set the visibility to `Private`, and fill in a name and description you like. Then click `Create repository`.
|
||||
|
||||

|
||||
|
||||
You will now have a clean repository, ready for your files.
|
||||
|
||||

|
||||
|
||||
To download a repository, you use `git clone`, which requires the repository URL. You can find that by clicking the green `Code` button. GitHub usually shows both HTTPS and SSH options.
|
||||
|
||||

|
||||
|
||||
In general, HTTPS is fine for temporary downloads or quick testing, but for your own daily development workflow, SSH is usually the better experience.
|
||||
|
||||
## Bind local SSH to GitHub
|
||||
|
||||
In GitHub, "binding SSH" means connecting your local machine's SSH public key to your GitHub account so GitHub can recognize your device through the SSH protocol. Once set up, you can `clone`, `pull`, and `push` securely without re-entering passwords every time.
|
||||
|
||||
In plain language: it is like giving your device a special access card for GitHub.
|
||||
|
||||
> 💡 What is SSH?
|
||||
|
||||
### Why use SSH authentication?
|
||||
|
||||
GitHub supports two major protocols for repository operations:
|
||||
|
||||
- **HTTPS**: usually requires a password or Personal Access Token for pushes
|
||||
- **SSH**: uses a key pair, so you do not need to repeat authentication constantly
|
||||
|
||||
SSH binding is the prerequisite for using GitHub with SSH. You must upload your local SSH public key to GitHub so GitHub can verify your machine.
|
||||
|
||||
### The core logic: SSH key pairs
|
||||
|
||||
SSH authentication depends on a key pair:
|
||||
|
||||
1. **Private key**: stored on your local machine, never shared
|
||||
2. **Public key**: uploaded to GitHub
|
||||
|
||||
When you perform a Git operation over SSH:
|
||||
|
||||
- Your machine signs the request with the private key
|
||||
- GitHub checks it against the public key you uploaded
|
||||
- If the match succeeds, the operation is allowed
|
||||
|
||||
### The actual steps
|
||||
|
||||
The core workflow is simple: **generate a key pair → upload the public key to GitHub**.
|
||||
|
||||
1. **Generate an SSH key pair locally**
|
||||
1. **Use Trae to help generate it**
|
||||
Prompt:
|
||||
`Help me create the SSH key needed for GitHub login. My email is your_email@gmail.com. Please return the public key for me to copy.`
|
||||
|
||||

|
||||
|
||||
After entering the prompt, you may still need to press `Enter` in the terminal pane so the command can continue. Once Trae finishes, it will show you the public key to copy.
|
||||
|
||||

|
||||
|
||||
2. **Generate it manually**
|
||||
Open your terminal and run `ssh-keygen -t ed25519 -C "your_email@example.com"`
|
||||
Press `Enter` to accept the defaults unless you want a custom path or passphrase. This creates:
|
||||
|
||||
- `id_ed25519`: your private key, which must stay local
|
||||
- `id_ed25519.pub`: your public key, which you will upload to GitHub
|
||||
|
||||
2. **Upload the public key to GitHub**
|
||||
|
||||
This is the binding step itself.
|
||||
|
||||
1. Copy the public key:
|
||||
- On Windows, open `C:\Users\<your>\.ssh\id_ed25519.pub`
|
||||
- On macOS/Linux, run `cat ~/.ssh/id_ed25519.pub`
|
||||
2. In GitHub, go to your avatar → `Settings` → `SSH and GPG keys` → `New SSH key`
|
||||

|
||||
3. Enter a title and paste the public key.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
3. **Verify the binding**
|
||||
|
||||
Run `ssh -T git@github.com`
|
||||
|
||||
If you see a message similar to `Hi [your GitHub username]! You've successfully authenticated...`, the setup worked.
|
||||
|
||||
### Important notes
|
||||
|
||||
- If you use multiple devices, create a separate SSH key pair for each one and upload each public key to the same GitHub account.
|
||||
- Never share your private key.
|
||||
- After setting up SSH, use SSH repository URLs such as `git@github.com:username/repository.git`, not HTTPS URLs.
|
||||
- If you cloned a repository over HTTPS earlier, you can switch it with `git remote set-url origin <new-ssh-url>`
|
||||
|
||||
# Use Trae for GitHub operations
|
||||
|
||||
Now that we have covered Git, GitHub, SSH, and the setup process, you can start asking Trae to help with Git operations.
|
||||
|
||||
## `git clone`: download an existing repository
|
||||
|
||||
You can directly tell Trae which repository URL you want to clone.
|
||||
|
||||

|
||||
|
||||
## `git pull`: fetch the latest remote updates
|
||||
|
||||
Before editing, especially in a shared repository, you should pull the latest changes first.
|
||||
|
||||
**Always include the folder name and its relative or absolute path so you do not pull in the wrong repository by mistake.**
|
||||
|
||||
Prompt:
|
||||
`Help me pull this repository AIID-TEST in ./AIID-TEST.`
|
||||
|
||||
## `git commit` and `git push`: stage, save, and upload your updates
|
||||
|
||||
After you modify files locally, you can ask Trae to detect the changes and help you push them to GitHub.
|
||||
|
||||
Prompt:
|
||||
`I finished. Commit and push to the repository AIID-TEST in ./AIID-TEST.`
|
||||
|
||||

|
||||
|
||||
If the push succeeds, you will be able to see the updated content on GitHub immediately.
|
||||
|
||||
# References
|
||||
|
||||
- Pro Git book: https://git-scm.com/book/en/v2
|
||||
- GitHub Docs: https://docs.github.com/en
|
||||
@@ -0,0 +1,517 @@
|
||||
# How to Deploy Web Applications
|
||||
|
||||
In this tutorial, we will walk through how to deploy your web application to the internet so other people can access it. We will introduce four common deployment platforms: **Tencent Cloud CloudBase**, **Vercel**, **Netlify**, and **Zeabur**. The goal is to help you go from "I finished writing the code" to "other people can visit my site online."
|
||||
|
||||
# What does "deployment" mean?
|
||||
|
||||
Before we begin, let's clarify what deployment actually is.
|
||||
|
||||
For any website to be visited by external users, it must have a publicly reachable network address. That can be an IP address such as `123.45.67.89`, or a domain such as [google.com](https://google.com/). But the address alone is not enough. Your code, such as HTML, CSS, JavaScript, or React/Vue projects, as well as images and video assets, must live on a server that stays online 24/7 and can answer incoming requests.
|
||||
|
||||

|
||||
|
||||
Image source: https://www.hostinger.com/tutorials/what-is-cloud-hosting
|
||||
|
||||
The full process of uploading resources, configuring the runtime environment, and making the service run is called **deployment**.
|
||||
|
||||
In simple terms: if your website runs only on your own computer, then only you can visit it locally because the files only exist on your hard drive. Deployment means moving your code and assets to a public-facing server, configuring that server properly, and making sure it knows how to respond when someone visits your domain.
|
||||
|
||||
If you deploy everything manually, a project usually involves many steps:
|
||||
|
||||
1. **Prepare a server**
|
||||
You first need to buy or rent a cloud server from a provider such as Alibaba Cloud, Tencent Cloud, or AWS EC2. Then you choose its region, CPU, memory, and storage, and learn how to connect to it remotely, often through SSH.
|
||||

|
||||
|
||||
2. **Configure the runtime environment**
|
||||
Web apps only run under the correct environment. A Node.js project needs Node installed. A Python project needs Python and its dependencies. If the versions do not match, the app may fail to start.
|
||||
|
||||
3. **Upload your files**
|
||||
You need to move your local code and assets to the server, often via Git or file-transfer tools. Large projects can make this step frustrating if uploads break halfway through.
|
||||
|
||||

|
||||
|
||||
4. **Start the service and test it**
|
||||
After upload, you need to start the app and check whether the assigned address works. If not, the problem may be a firewall-blocked port, or it may be an application bug. In that case, you need to inspect logs.
|
||||
|
||||
5. **Maintain and update**
|
||||
Every code update usually means another upload and restart. If the server crashes, you may need to restart services manually or configure a process manager to keep them alive.
|
||||
|
||||
Platforms such as CloudBase, Vercel, Netlify, and Zeabur exist to eliminate much of that complexity. They automate the boring parts:
|
||||
|
||||
- buying and provisioning servers
|
||||
- configuring runtimes
|
||||
- pulling code
|
||||
- starting services
|
||||
- monitoring uptime
|
||||
|
||||
In many cases, you just connect a GitHub repository or upload your code, and the platform does the rest.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
# Deployment platform comparison
|
||||
|
||||
| Platform | Main strengths | Best for | Free tier |
|
||||
|------|------|----------|----------|
|
||||
| **Tencent Cloud CloudBase** | Fast access within mainland China, strong WeChat ecosystem integration | China-focused users, WeChat Mini Program support | Yes |
|
||||
| **Vercel** | Excellent support for frontend frameworks, tight GitHub integration | Modern React/Vue/Next.js frontend projects | Yes |
|
||||
| **Netlify** | Broad feature set, great Git workflow, form handling, auth support | Static sites that also need forms or auth | Yes |
|
||||
| **Zeabur** | Flexible service combinations and many templates | More complex projects, including tools like Dify and n8n | About $5/month in free quota |
|
||||
|
||||
---
|
||||
|
||||
# 1. Tencent Cloud CloudBase
|
||||
|
||||
Tencent Cloud CloudBase is Tencent's integrated cloud backend platform and is especially friendly for developers targeting domestic Chinese users.
|
||||
|
||||
Its advantages include:
|
||||
|
||||
- **Fast domestic access**
|
||||
- **WeChat ecosystem integration**
|
||||
- **An all-in-one backend solution** including static hosting, cloud functions, databases, and storage
|
||||
- **A practical free tier**
|
||||
|
||||
## Deploy a web app with CloudBase
|
||||
|
||||
### Step 1: Register and log in
|
||||
|
||||
Visit the [Tencent Cloud CloudBase Console](https://console.cloud.tencent.com/tcb) and log in with WeChat or QQ.
|
||||
|
||||
### Step 2: Create an environment
|
||||
|
||||
Click `Create Environment` and choose an environment name such as `my-web-app`.
|
||||
|
||||
> ⚠️ **Note**: the free trial version of CloudBase often requires a redemption code. You usually need to follow the CloudBase official account and obtain a code there.
|
||||
|
||||
### Step 3: Enable static website hosting
|
||||
|
||||
Inside the environment management screen, enable the `Static Website Hosting` feature. Once enabled, you will receive a default public domain.
|
||||
|
||||
CloudBase supports several deployment methods:
|
||||
|
||||
- upload a local build output
|
||||
- deploy from a template
|
||||
- deploy from a Git repository
|
||||
|
||||
### Step 4: Deploy your code
|
||||
|
||||
CloudBase offers three main workflows:
|
||||
|
||||
**Option 1: upload a local project**
|
||||
|
||||
- choose `Local Project Deployment`
|
||||
- upload your built static files such as HTML, CSS, and JS
|
||||
- typically upload a `dist` or `build` directory
|
||||
|
||||
**Option 2: use a template**
|
||||
|
||||
- start from a preset project template
|
||||
- common options include React and Vue starter templates
|
||||
|
||||
**Option 3: deploy from Git**
|
||||
|
||||
- connect a GitHub repository
|
||||
- set the build command, such as `npm run build`
|
||||
- every push can trigger an automatic redeploy
|
||||
|
||||
> 💡 **Tip**: you can also deploy from the command line:
|
||||
>
|
||||
> ```bash
|
||||
> # Install CloudBase CLI
|
||||
> npm install -g @cloudbase/cli
|
||||
> # Log in
|
||||
> tcb login
|
||||
> # Deploy
|
||||
> tcb hosting deploy ./dist -e your-env-id
|
||||
> ```
|
||||
|
||||
### Step 5: Add a custom domain (optional)
|
||||
|
||||
CloudBase also supports binding your own domain and applying a free HTTPS certificate.
|
||||
|
||||
---
|
||||
|
||||
# 2. Vercel
|
||||
|
||||
Vercel is one of the most popular frontend deployment platforms in the world and is especially good for React, Vue, and Next.js projects.
|
||||
|
||||
Its main strengths:
|
||||
|
||||
- **Deep GitHub integration**
|
||||
- **Automatic preview deployments for pull requests**
|
||||
- **Global CDN distribution**
|
||||
- **Support for serverless functions**
|
||||
|
||||
> ⚠️ **Note**: in some mainland-China network environments, Vercel may be less stable than domestic options such as CloudBase.
|
||||
|
||||
## Deploy a web app with Vercel
|
||||
|
||||
### Step 1: Register
|
||||
|
||||
Visit [Vercel](https://vercel.com) and sign in with GitHub.
|
||||
|
||||
### Step 2: Import a project
|
||||
|
||||
1. Click `Add New Project`
|
||||
2. Select the GitHub repository you want to deploy
|
||||
3. If needed, adjust GitHub app permissions
|
||||
|
||||
### Step 3: Configure build settings
|
||||
|
||||
Vercel often detects the framework automatically:
|
||||
|
||||
| Framework | Build command | Output directory |
|
||||
|------|----------|----------|
|
||||
| React | `npm run build` | `build` |
|
||||
| Vue | `npm run build` | `dist` |
|
||||
| Next.js | `next build` | - |
|
||||
| Plain HTML | - | project root |
|
||||
|
||||
If detection fails, configure it manually:
|
||||
|
||||
- **Build Command**
|
||||
- **Output Directory**
|
||||
- **Install Command**
|
||||
|
||||
### Step 4: Deploy
|
||||
|
||||
Click `Deploy` and wait for the build to complete. A successful project receives a `xxx.vercel.app` domain.
|
||||
|
||||
### Step 5: Add a custom domain (optional)
|
||||
|
||||
Use the `Domains` section in project settings to bind your own domain. HTTPS is handled automatically.
|
||||
|
||||
---
|
||||
|
||||
# 3. Netlify
|
||||
|
||||
Netlify is another strong frontend deployment platform, especially for static sites and single-page applications.
|
||||
|
||||
Its strengths:
|
||||
|
||||
- **Feature-rich hosting**, including form handling, auth, and edge/serverless functions
|
||||
- **Strong Git integration**
|
||||
- **Preview links for branches**
|
||||
- **Global CDN**
|
||||
- **Built-in form handling**
|
||||
- **Built-in user authentication tools**
|
||||
|
||||
> ⚠️ **Note**: Netlify may not be as fast as CloudBase for domestic Chinese users.
|
||||
|
||||
## Deploy a web app with Netlify
|
||||
|
||||
### Step 1: Register
|
||||
|
||||
Visit [Netlify](https://www.netlify.com) and sign up with GitHub, GitLab, Bitbucket, or email.
|
||||
|
||||
### Step 2: Import a project
|
||||
|
||||
1. Click `Add new site` → `Import an existing project`
|
||||
2. Choose your Git provider
|
||||
3. Authorize Netlify
|
||||
4. Select the repository
|
||||
|
||||
### Step 3: Configure build settings
|
||||
|
||||
| Framework | Build command | Publish directory |
|
||||
|------|----------|----------|
|
||||
| React | `npm run build` | `build` |
|
||||
| Vue | `npm run build` | `dist` |
|
||||
| Angular | `ng build` | `dist/<project-name>` |
|
||||
| Next.js | `next build` | `out` |
|
||||
| Plain HTML | - | `.` |
|
||||
|
||||
### Step 4: Deploy
|
||||
|
||||
Click `Deploy site`. Once it succeeds, you will receive a `xxx.netlify.app` domain.
|
||||
|
||||
### Step 5: Add a custom domain (optional)
|
||||
|
||||
1. Open the site settings
|
||||
2. Go to `Domain management`
|
||||
3. Add your custom domain
|
||||
4. Follow the DNS instructions
|
||||
|
||||
### Useful Netlify features
|
||||
|
||||
#### 1. Form handling
|
||||
|
||||
Netlify can capture form submissions without requiring a dedicated backend.
|
||||
|
||||
```html
|
||||
<form name="contact" netlify>
|
||||
<p>
|
||||
<label>Name: <input type="text" name="name" /></label>
|
||||
</p>
|
||||
<p>
|
||||
<label>Email: <input type="email" name="email" /></label>
|
||||
</p>
|
||||
<p>
|
||||
<label>Message: <textarea name="message"></textarea></label>
|
||||
</p>
|
||||
<p>
|
||||
<button type="submit">Send</button>
|
||||
</p>
|
||||
</form>
|
||||
```
|
||||
|
||||
After deployment, Netlify automatically stores submission data and can forward it to email or other services.
|
||||
|
||||
#### 2. Netlify Functions
|
||||
|
||||
Netlify also supports serverless functions, which are useful for small APIs without maintaining a full backend.
|
||||
|
||||
For example:
|
||||
|
||||
```javascript
|
||||
exports.handler = async (event, context) => {
|
||||
return {
|
||||
statusCode: 200,
|
||||
body: JSON.stringify({ message: "Hello from Netlify!" })
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
After deployment, the function is accessible at:
|
||||
|
||||
`https://your-domain/.netlify/functions/hello`
|
||||
|
||||
#### 3. Local development support
|
||||
|
||||
Netlify provides a CLI:
|
||||
|
||||
```bash
|
||||
# Install Netlify CLI
|
||||
npm install -g netlify-cli
|
||||
|
||||
# Log in
|
||||
netlify login
|
||||
|
||||
# Start local development
|
||||
netlify dev
|
||||
|
||||
# Test functions locally
|
||||
netlify functions:serve
|
||||
```
|
||||
|
||||
This lets you simulate Netlify forms and function behavior locally before deploying.
|
||||
|
||||
---
|
||||
|
||||
# 4. Zeabur
|
||||
|
||||
Zeabur is a newer deployment platform that is especially useful for more complex projects involving multiple services.
|
||||
|
||||
Its main strengths:
|
||||
|
||||
- **Many built-in service templates**
|
||||
- **Support for multiple deployment methods**
|
||||
- **Flexible multi-service composition**
|
||||
- **Usage-based billing**
|
||||
|
||||
## Deploy Dify with Zeabur
|
||||
|
||||
In earlier chapters, we already touched on Dify briefly. Now we can launch a full Dify service through [Zeabur](https://zeabur.com/projects) very easily.
|
||||
|
||||
First, open the [console page](https://zeabur.com/projects):
|
||||
|
||||

|
||||
|
||||
In that interface, you will see a set of service blocks. At the top are options such as `Agent`, `Servers`, `Docs`, and `Templates`:
|
||||
|
||||
1. **Agent**: Zeabur's built-in assistant for operational questions
|
||||
2. **Servers**: add or buy cloud servers
|
||||
3. **Docs**: official documentation
|
||||
4. **Templates**: built-in application templates
|
||||
|
||||
> An **image** can be understood as a packaged runtime environment + application state. If a service has already been configured successfully on one machine, it can be packed into an image and reused elsewhere.
|
||||
|
||||
In the upper-right corner, you can also see your balance. By default, Zeabur usually gives you a small monthly free quota, roughly around 5 USD worth of usage.
|
||||
|
||||

|
||||
|
||||
You can click the balance to inspect daily usage:
|
||||
|
||||

|
||||
|
||||
Now let's create a Dify service.
|
||||
|
||||
Start by clicking `New Project` on the [console homepage](https://zeabur.com/projects):
|
||||
|
||||

|
||||
|
||||
Zeabur supports several ways to create a service:
|
||||
|
||||
1. **GitHub**
|
||||
Connect your GitHub account and deploy directly from a repository.
|
||||
2. **Template**
|
||||
Start from a built-in app template such as Dify or n8n.
|
||||

|
||||
3. **Databases**
|
||||
Deploy databases such as MySQL or MongoDB.
|
||||

|
||||
4. **Functions**
|
||||
Deploy JavaScript or Python functions.
|
||||

|
||||

|
||||
5. **Local Project**
|
||||
Upload a local folder and let Zeabur detect how to run it.
|
||||

|
||||
6. **Docker Image**
|
||||
Deploy from an already built Docker image.
|
||||

|
||||
7. **Cursor**
|
||||
Deploy directly from a project you are editing in Cursor.
|
||||
|
||||
If you want to deploy Dify, the easiest path is **Template**. Search for `dify`, choose a version you like, and continue.
|
||||
|
||||

|
||||
|
||||
Then choose any project name. Zeabur will generate a temporary domain based on that name.
|
||||
|
||||

|
||||
|
||||
After creation, you will see multiple services starting one after another. Dify is not a single program, but rather a group of coordinated services, so you need to wait until they are all running.
|
||||
|
||||
In many setups, you can click the main Dify app to get the access address. In this example, however, the final entry point is exposed through `nginx`, so you need to open the `nginx` service and find the public service address there.
|
||||
|
||||

|
||||
|
||||
After waiting a bit, you should see the Dify login screen. Register an account with your email and password, and your own Dify service is ready.
|
||||
|
||||

|
||||
|
||||
You can also launch `n8n` in a similar way if you want another AI workflow tool:
|
||||
|
||||

|
||||
|
||||
## Deploy a Snake game with Zeabur and Trae
|
||||
|
||||
To explore Zeabur's more advanced usage, let's deploy something simpler first: a Snake game generated with Trae.
|
||||
|
||||
### Deploy an HTML-based version
|
||||
|
||||

|
||||
|
||||
Trae can generate a browser-based Snake game from plain HTML very easily. Once the project is created locally, you can upload the whole folder to Zeabur using the local-project deployment method described above.
|
||||
|
||||

|
||||
|
||||
After deployment, you will enter the service details page:
|
||||
|
||||

|
||||
|
||||
Click `Network` on the left, find `Public Address`, and click `Generate Domain` to create a public URL.
|
||||
|
||||

|
||||

|
||||
|
||||
Once that address is generated, opening it in the browser will let you play your Snake game publicly:
|
||||
|
||||

|
||||
|
||||
This same method works well for other static HTML-based web apps too.
|
||||
|
||||
### Deploy a React version
|
||||
|
||||
Now let's deploy a React app instead of a plain HTML app. Compared with static HTML, React is a more modern and component-based frontend framework, and it is common in production applications.
|
||||
|
||||

|
||||
|
||||
#### Refactor into a React architecture
|
||||
|
||||
In Trae, you can simply say:
|
||||
|
||||
`Help me refactor this code into a React architecture.`
|
||||
|
||||

|
||||
|
||||
However, React apps are a bit more demanding to deploy because they rely on a build toolchain and a more structured project layout.
|
||||
|
||||
One especially important issue is the **port**. A local React development server often listens on port `3000` by default. Zeabur, however, expects the deployed app to listen on port `8080`.
|
||||
|
||||
If your React app still listens on `3000`, the deployment may fail because Zeabur cannot route traffic to it correctly.
|
||||
|
||||
#### What is a port?
|
||||
|
||||
You can think of the IP address as the building address and the port number as the room number. Together, `IP:port` points to a specific service.
|
||||
|
||||
Most websites do not explicitly show a port because browsers automatically assume the default ports:
|
||||
|
||||
- `80` for HTTP
|
||||
- `443` for HTTPS
|
||||
|
||||
But for app-specific services such as React development servers (`3000`) or Zeabur deployments (`8080`), the port becomes important.
|
||||
|
||||
#### What does "listening on a port" mean?
|
||||
|
||||
When a program listens on a port, it is telling the operating system:
|
||||
|
||||
`I am waiting here for incoming network requests. Send them to me.`
|
||||
|
||||
In the building analogy, the IP is the building address, and the port is the room number. The React dev server opens room `3000` and tells the building manager, "Any requests addressed to room 3000 should be delivered to me."
|
||||
|
||||
When you run `npm start` locally, React commonly chooses port `3000`. Zeabur, however, is designed to work with apps listening on `8080`, so you need to change the default.
|
||||
|
||||
#### Change the default listening port
|
||||
|
||||
The easiest way is simply to ask Trae:
|
||||
|
||||
`Please help me change the default port of this React project to 8080.`
|
||||
|
||||
Trae can modify the relevant configuration for you. After that, rebuild the project and upload it to Zeabur again.
|
||||
|
||||

|
||||

|
||||
|
||||
Once you configure the public network address just as you did for the HTML project, the React app can also be served successfully.
|
||||
|
||||

|
||||

|
||||
|
||||
The same idea applies to any other app that needs a port adjustment before deployment.
|
||||
|
||||
---
|
||||
|
||||
# ⚠️ How to pause or delete a Zeabur project
|
||||
|
||||
Because server resources cost money, you should always get in the habit of stopping services you are no longer using.
|
||||
|
||||
Open the project's `Settings`:
|
||||
|
||||

|
||||
|
||||
Scroll to the bottom, and you will see controls like the following:
|
||||
|
||||

|
||||
|
||||
You can:
|
||||
|
||||
- click `Suspend All Services` to pause everything and reduce cost
|
||||
- click `Restart All Services` to restart services if something is stuck
|
||||
- click `Delete Project` if you are sure you no longer need it
|
||||
|
||||
---
|
||||
|
||||
# Summary
|
||||
|
||||
In this tutorial, we introduced four common deployment platforms:
|
||||
|
||||
1. **Tencent Cloud CloudBase**: good for domestic Chinese users and strong WeChat integration
|
||||
2. **Vercel**: excellent for modern frontend frameworks and GitHub-driven workflows
|
||||
3. **Netlify**: strong for static sites that also need forms, auth, and other hosting features
|
||||
4. **Zeabur**: very useful for more complex projects with multiple services and templates
|
||||
|
||||
Which one you choose depends on your needs:
|
||||
|
||||
- For primarily domestic Chinese audiences, **CloudBase** is often the best first choice
|
||||
- For React, Next.js, and similar stacks, **Vercel** or **Netlify** are strong options
|
||||
- For static sites that also need forms or auth, **Netlify** is especially useful
|
||||
- For Dify, n8n, and other multi-service setups, **Zeabur** is often the easiest
|
||||
|
||||
No matter which platform you choose, the deployment workflow is conceptually similar:
|
||||
|
||||
**prepare the code → choose a platform → configure the build → deploy it**
|
||||
|
||||
Once you understand that loop, you can start publishing your own projects for the world to use.
|
||||
@@ -0,0 +1,702 @@
|
||||
# CLI AI Coding Tools
|
||||
|
||||
In this tutorial, we introduce AI coding agents that run directly in the command line. They are different from the agents we used earlier in Trae and Cursor. CLI AI coding tools can only be used in the terminal. Compared with agents integrated into AI IDEs, they usually have longer context windows, faster tool-calling speed, and compatibility with a wider range of large models. In the latest AI Vibe Coding practice, we often prioritize CLI AI coding tools over built-in IDE coding agents.
|
||||
|
||||
## Starting from the CLI
|
||||
|
||||
Do you still remember the CLI we introduced before? CLI means using pure text commands in a terminal or command prompt to operate software applications, instead of relying on a graphical interface (GUI. You can simply think of GUI as the clickable interface with buttons on a computer or phone, where you do not need to type commands).
|
||||
|
||||
> On Windows, common terminals include Command Prompt (`cmd`) and PowerShell. You can type `cmd` or `powershell` in the Run/Search box to launch them.
|
||||
|
||||

|
||||
|
||||
The CLI is naturally good for text-command workflows. Among a small group of geeks (programming enthusiasts pursuing extreme efficiency), CLI is even more popular than GUI. They want to complete everything with the keyboard and feel that moving the mouse can slow down coding efficiency.
|
||||
|
||||
In industry, CLI is also often the most common interface form, because GUI requires the operating system to draw interfaces and manage windows, which demands more computer resources. CLI only needs to pass received commands to the system for execution. So when connecting to large-scale server clusters, we usually interact only through CLI.
|
||||
|
||||

|
||||
|
||||
For many learners with no CLI experience, command-line operations can feel complicated, with too many commands, and even the fear of "accidentally breaking the computer." No need to worry. Remember how, in previous tutorials, we often asked Trae to help with basic operations? We can use exactly the same idea here. We can ask CLI coding tools to perform all CLI operations for us: entering specific folders, searching and processing files, running or copying open-source projects, and so on. The whole process can be completed through conversation with the CLI AI coding tool.
|
||||
|
||||
## How Is It Different from an AI IDE
|
||||
|
||||
We can compare CLI AI coding tools to z.ai and Trae that we used before. In a sense, CLI AI coding tools can be seen as a special kind of z.ai: they also only need a simple chat entry, and then they automatically perform the required operations (sometimes you just need to open a browser manually to check the final result). If compared to AI IDEs, CLI AI coding tools can be seen as the Agent module inside an IDE, which is the side chat panel.
|
||||
|
||||

|
||||
|
||||
However, because different AI IDEs implement agents in different ways, their capability gaps are large, and AI coding quality is often unstable. CLI AI coding tools are usually developed directly by major tech companies, such as Anthropic behind Claude and OpenAI behind ChatGPT.
|
||||
|
||||
Compared with other AI coding agents, directly using products from these major companies is often a better practice. Claude Code in particular is a tool used by Anthropic's own R&D teams, designed from the start around "meeting real engineer needs."
|
||||
|
||||
To compare more intuitively, we can look at the difference between Claude Code and one AI IDE agent (Cursor as an example):
|
||||
|
||||
| Feature | Claude Code | Cursor | Better Choice |
|
||||
| ------------------ | ----------------- | ------------------- | ------------- |
|
||||
| Automatic execution | ✅ Very strong | ❌ Limited | Claude Code |
|
||||
| IDE integration | ❌ CLI only | ✅ Native VS Code | Cursor |
|
||||
| Real-time completion | ❌ None | ✅ Excellent | Cursor |
|
||||
| Multi-file operations | ✅ Very strong | ⚠️ Pretty good | Claude Code |
|
||||
| GitHub integrated workflow | ✅ Can commit directly | ⚠️ More manual | Claude Code |
|
||||
| Learning cost | ⚠️ Medium | ✅ Easy to start | Cursor |
|
||||
| Context length | ✅ Very long | ⚠️ Good | Claude Code |
|
||||
| Debug assistance | ✅ Automated | ⚠️ More manual work | Claude Code |
|
||||
|
||||
Table source: https://northflank.com/blog/claude-code-vs-cursor-comparison
|
||||
|
||||
In short, CLI AI coding tools usually can:
|
||||
|
||||
- Support much longer continuous conversations (they can even "work for you all day").
|
||||
- Provide longer context windows (you no longer need to frequently say "continue").
|
||||
- Respond faster (with support for more custom model APIs).
|
||||
|
||||
For coding-related operations, they are usually smarter and more stable than most IDE built-in agents.
|
||||
|
||||
## Common CLI AI Coding Tools
|
||||
|
||||
Although there are many open-source implementations now, in practice we only recommend two major types of CLI AI coding tools as the "preferred combo." You can choose either one based on your habits, and we strongly recommend trying both before deciding which suits you best.
|
||||
|
||||
- Codex uses GPT-5 and is stronger overall in capability.
|
||||
- Claude Code, routed through GLM 4.6 compatible APIs, offers an experience close to Claude 4 at a lower cost.
|
||||
|
||||
However, which one works better in your real project can only be determined by hands-on testing. Mastering multiple AI coding tools is always beneficial. Once you are skilled, you can switch flexibly among Claude Code, Codex, or Trae in different scenarios. If one tool does not perform well after multiple tries, just switch to another tool or model and continue experimenting.
|
||||
|
||||
At the same time, because model versions update very quickly, we recommend prioritizing whichever option currently performs best in cost-performance (quality / cost).
|
||||
|
||||
### Claude Code
|
||||
|
||||
Claude Code is an AI coding tool developed by Anthropic based on Claude model capabilities. Its primary interaction happens in the terminal, and it can also be used as a VS Code extension. Similar to an agent inside an AI IDE, it can deeply understand a developer's repository and complete end-to-end development tasks through natural language instructions, including code editing, bug fixing, running and fixing tests, managing Git workflows (such as resolving merge conflicts and creating PRs), explaining complex code, and executing terminal commands.
|
||||
|
||||

|
||||
|
||||
Claude Code's main advantages are: very long context windows (it can handle whole files or even small projects), proactively clarifying ambiguous requirements, automatically planning and allocating execution tasks, and deeply understanding and explaining the entire codebase. Compared with ordinary IDE agents, it is better suited for immersive vibe-coding workflows.
|
||||
|
||||
In actual use, you can ask it through chat to create new projects, perform CLI operations (such as organizing folders, bulk renaming files, deploying open-source projects), and configure development environments (such as installing and debugging Python environments). If you find some code difficult to understand, or a folder structure unclear, you can directly ask Claude Code to generate structured analysis documentation or explain specific parts step by step.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
If you want to systematically learn Claude Code, you can refer to the course jointly launched by Andrew Ng and Anthropic:
|
||||
https://www.bilibili.com/video/BV176t2zSEpr
|
||||
|
||||
Next, we will learn how to use Claude Code. Because directly using the official Claude Code is often very expensive (as shown below), we will instead use API platforms that are compatible with Claude Code protocol but based on other large models.
|
||||
|
||||

|
||||
|
||||
You need to learn the different options below (it is best to try all of them), and finally choose the one that suits you best as your main path.
|
||||
|
||||
The first approach is to directly use APIs that are "Anthropic-interface compatible." As Claude Code becomes more popular, more model providers now support Anthropic-style invocation. Common providers include GLM, Kimi, DeepSeek, and Siliconflow. They all provide compatible API interfaces. We will explain specific configuration details later.
|
||||
|
||||
One thing to note: Claude Code usually consumes a lot of tokens. If you are worried about high API costs, you can consider GLM monthly plans (about 20 RMB/month) to control cost. If you first want to estimate actual spending, you can also recharge 10 RMB for small-scale experiments.
|
||||
|
||||
Another approach is using the "Claude Code Route" project. It is an open-source tool that supports all common API invocation interfaces and allows fine-grained model configuration for different scenarios, including local model access. But this option is more complex to configure, so we suggest starting with the first approach.
|
||||
|
||||
#### Use Zhipu GLM as the Backend (Recommended)
|
||||
|
||||
GLM (General Language Model) is a series of large language models independently developed by Zhipu AI. GLM-4.6 is currently the latest version in the GLM family. Its core highlight is strong coding performance (benchmarking Claude Sonnet 4 in public benchmarks and real tasks, and considered top-tier domestically).
|
||||
|
||||

|
||||
|
||||
It also extends the context window to 200K, allowing easier handling of long text and large codebases, while strengthening reasoning and tool-calling capabilities, achieving a good balance between performance and cost.
|
||||
|
||||

|
||||
|
||||
Before connecting GLM, we first need to install Claude Code.
|
||||
|
||||
If command-line installation feels troublesome, or errors appear midway, you can directly ask Trae's Agent to complete installation for you.
|
||||
|
||||
```python
|
||||
# Install Claude Code
|
||||
npm install -g @anthropic-ai/claude-code
|
||||
|
||||
# Enter your project
|
||||
cd your-awesome-project
|
||||
|
||||
# Start Claude Code
|
||||
claude
|
||||
|
||||
# Press Ctrl+C to exit Claude
|
||||
```
|
||||
|
||||
Next, we need to change Claude Code's default API request endpoint so it supports GLM's API service. You can copy the content below and ask Trae to create the corresponding environment variables for you. You can also choose to write them permanently into system environment variables (if issues occur, you can also ask Agent to help modify them).
|
||||
|
||||
First, you need to obtain your GLM API key and store it in whatever way is most convenient for you.
|
||||
|
||||
Domestic URL: https://bigmodel.cn/usercenter/proj-mgmt/apikeys
|
||||
International URL: https://z.ai/manage-apikey/apikey-list
|
||||
|
||||
If you are using the **domestic GLM** service, use the following variable configuration:
|
||||
|
||||
```python
|
||||
# Run the following command in Cmd
|
||||
# Replace `your_zhipu_api_key` with the API key you just obtained
|
||||
setx ANTHROPIC_AUTH_TOKEN your_zhipu_api_key
|
||||
setx ANTHROPIC_BASE_URL https://open.bigmodel.cn/api/anthropic
|
||||
```
|
||||
|
||||
If you are using the **international GLM** service, use this configuration:
|
||||
|
||||
```python
|
||||
# Run the following command in Cmd
|
||||
# Also replace `your_zai_api_key`
|
||||
setx ANTHROPIC_AUTH_TOKEN your_zai_api_key
|
||||
setx ANTHROPIC_BASE_URL https://api.z.ai/api/anthropic
|
||||
```
|
||||
|
||||
You can directly enter a prompt like this in Trae:
|
||||
|
||||
⚠️ If you configure "permanent environment variables" through Trae, then after configuration you **must restart Trae**. Otherwise environment variables in Trae's built-in terminal will not refresh, which may cause login failures or network connection errors.
|
||||
|
||||
```python
|
||||
Based on my environment variable settings:
|
||||
setx ANTHROPIC_AUTH_TOKEN your_zai_api_key
|
||||
setx ANTHROPIC_BASE_URL https://api.z.ai/api/anthropic
|
||||
|
||||
and my key(Replace it with your own key):
|
||||
681fea485851d29060cc.13gfaendggaFOhb
|
||||
|
||||
please help me configure and start Claude Code
|
||||
```
|
||||
|
||||
You will see output similar to the following:
|
||||
|
||||

|
||||
|
||||
> 💡 What is an environment variable?
|
||||
>
|
||||
> Environment variables are essentially key-value configuration entries stored in the operating system, usually in the form "variable name = specific value." If configured in advance in terminal or system settings, programs can read these variables at any time to obtain relevant information. Because environment variables can be written directly in terminal without modifying code, we usually store large-model access keys in environment variables to avoid leakage. Programs only need to read corresponding environment variables to complete model invocation.
|
||||
>
|
||||
> In Windows, besides storing model access keys, environment variables are also commonly used to store executable "path locations" for command-line tools.
|
||||
>
|
||||
> We know the terminal itself is also a program. Sometimes we want to launch an external program from terminal. For example, typing `claude` in terminal to launch Claude Code. The reason this works is that terminal reads system environment variables, and the PATH variable contains the directory where Claude Code executable resides, so terminal can find and execute it (equivalent to pasting that program's absolute path into terminal and pressing Enter).
|
||||
>
|
||||
> A typical environment variable may look like this: `PATH=C:\Windows\system32;C:\Program Files\Python`. Then we can execute those programs from any directory, for example directly typing `python` in command line to start the Python interpreter.
|
||||
>
|
||||
> If you want to view current system environment variables, type "environment variables" in Windows Search, then in the "Edit the system environment variables" window you can see all variables and their values. Some store model keys, while others add program directories for invocation from any path.
|
||||
|
||||
Now you can use the latest GLM for Claude Code development. You can try rerunning previous projects, or retry tasks that Trae did not complete well, and compare the experience differences.
|
||||
|
||||
🎉 Rebuilding repeatedly is not a waste of time. Every repetition makes your skills more solid.
|
||||
|
||||
Using exactly the same logic as with GLM, you can also connect other interfaces that support Anthropic-compatible formats.
|
||||
|
||||
#### Use Kimi K2 as the Backend (Recommended)
|
||||
|
||||
Kimi K2 is a new-generation large language model released by Moonshot AI, with excellent performance in code understanding and generation. Kimi K2 supports ultra-long context windows (up to 200K tokens), and can easily handle large repositories and complex projects.
|
||||
|
||||
**Core advantages:**
|
||||
- **Ultra-long context**: Supports 200K context window, enabling one-pass handling of whole-project code
|
||||
- **Strong coding ability**: Performs very well in generation, refactoring, and debugging
|
||||
- **Better Chinese understanding**: More accurate understanding of Chinese programming requirements
|
||||
- **Stable tool invocation**: Supports reliable function-calling and tool usage
|
||||
|
||||
**Get API Key:**
|
||||
|
||||
Visit https://platform.moonshot.cn/console/account to register and obtain an API key.
|
||||
|
||||
**Configuration method:**
|
||||
|
||||
Reference docs: https://platform.moonshot.cn/docs/guide/agent-support
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL=https://api.moonshot.cn/anthropic
|
||||
export ANTHROPIC_AUTH_TOKEN=sk-YOURKEY
|
||||
```
|
||||
|
||||
#### Use Minimax as the Backend (Recommended)
|
||||
|
||||
Minimax is a new-generation large language model released by MiniMax, with excellent performance on programming tasks. Minimax models are known for strong reasoning and code-generation quality, especially suitable for complex programming scenarios.
|
||||
|
||||
**Core advantages:**
|
||||
- **Strong reasoning**: Performs well in complex logic reasoning and code architecture design
|
||||
- **High code quality**: Generated code is clear in structure and readable
|
||||
- **Multi-language support**: Supports code generation and conversion across multiple languages
|
||||
- **Fast response speed**: API responds quickly, suitable for high-frequency invocation scenarios
|
||||
|
||||
**Get API Key:**
|
||||
|
||||
Visit https://platform.minimax.io/ to register and obtain an API key.
|
||||
|
||||
**Configuration method:**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL=https://api.minimax.io/anthropic
|
||||
export ANTHROPIC_AUTH_TOKEN=YOUR_MINIMAX_API_KEY
|
||||
export ANTHROPIC_MODEL=MiniMax-M2.7
|
||||
```
|
||||
|
||||
#### Use DeepSeek as the Backend (Recommended)
|
||||
|
||||
DeepSeek is an open-source large language model released by DeepSeek, popular among developers for strong coding capabilities and high cost-performance. DeepSeek Coder is specially optimized through training for programming tasks.
|
||||
|
||||
**Core advantages:**
|
||||
- **Outstanding coding capability**: Strong performance in code generation, understanding, and bug fixing
|
||||
- **Open-source and customizable**: Open-source model, can be fine-tuned based on needs
|
||||
- **High cost-performance**: Relatively low API pricing, suitable for high-frequency use
|
||||
- **Good Chinese support**: Accurate understanding of Chinese programming scenarios
|
||||
|
||||
**Get API Key:**
|
||||
|
||||
Visit https://platform.deepseek.com/usage to register and obtain an API key.
|
||||
|
||||
**Configuration method:**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL=https://api.deepseek.com/anthropic
|
||||
export ANTHROPIC_AUTH_TOKEN=YOU_DEEPSEEK_API_KEY
|
||||
export API_TIMEOUT_MS=600000
|
||||
export ANTHROPIC_MODEL=deepseek-chat
|
||||
export ANTHROPIC_SMALL_FAST_MODEL=deepseek-chat
|
||||
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1
|
||||
```
|
||||
|
||||
#### Use Volcano Engine Coding Plan as the Backend (Recommended)
|
||||
|
||||
Volcano Engine is ByteDance's cloud service platform, providing enterprise-level AI model services. Volcano Engine's Coding Plan is specially optimized for coding scenarios, offering stable and efficient code-generation capability.
|
||||
|
||||
**Core advantages:**
|
||||
- **Enterprise-grade stability**: Provides SLA guarantees for service stability
|
||||
- **Coding-scenario optimization**: Specifically optimized for programming tasks
|
||||
- **Rich model choices**: Supports multiple models including Doubao-pro and Doubao-lite
|
||||
- **Fast domestic access**: Domestic node deployment with faster access speed
|
||||
|
||||
**Get API Key:**
|
||||
|
||||
Visit https://console.volcengine.com/ark/region:ark+cn-beijing/apiKey to register and obtain an API key.
|
||||
|
||||
**Configuration method:**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL=https://ark.volces.com/api/anthropic
|
||||
export ANTHROPIC_AUTH_TOKEN=YOUR_VOLCANO_API_KEY
|
||||
export ANTHROPIC_MODEL=doubao-pro-32k
|
||||
```
|
||||
|
||||
#### Other Anthropic-Compatible APIs
|
||||
|
||||
Siliconflow:
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL="https://api.siliconflow.cn/"
|
||||
export ANTHROPIC_MODEL="moonshotai/Kimi-K2-Instruct-0905" # You can change to the model you need
|
||||
export ANTHROPIC_API_KEY="YOUR_SILICONCLOUD_API_KEY" # Replace with your API key
|
||||
```
|
||||
|
||||
Aliyun DashScope (Aliyuncs): https://help.aliyun.com/zh/model-studio/get-api-key
|
||||
|
||||
```python
|
||||
export ANTHROPIC_BASE_URL="https://dashscope.aliyuncs.com/apps/anthropic"
|
||||
export ANTHROPIC_API_KEY="YOUR_DASHSCOPE_API_KEY"
|
||||
```
|
||||
|
||||
::: details Use Claude Code Route as the Backend (Advanced Usage)
|
||||
|
||||
Above we explained how to replace Claude Code's Anthropic interface with the official GLM API. Next, let's look at how Claude Code Router allows Claude Code to adapt to more model APIs.
|
||||
|
||||
[Claude Code Router](https://github.com/musistudio/claude-code-router) is an intelligent routing enhancement tool designed specifically for Claude Code. Its core function is helping users distribute AI requests to models across different platforms as needed, with a high degree of customization. It supports access to dozens of platforms including OpenRouter, DeepSeek, Ollama, Gemini, and more. It can also route tasks to specific models by scenario, such as GLM-4.5, Kimi-K2, and Qwen3-Coder. For example, you can route background tasks to local Ollama to save cost, route long text / long code tasks to Gemini-2.5-Pro, and route code explanation to DeepSeek.
|
||||
|
||||

|
||||
|
||||
This tool also provides convenient UI/CLI configuration management and uses converters to adapt API formats from different platforms. It supports automation integration such as GitHub Actions and custom extensions, solving the problems of "one single model cannot cover all scenarios" and "frequent platform switching is troublesome," helping users use AI tools more flexibly and at lower cost.
|
||||
|
||||

|
||||
|
||||
Below is a quick introduction to installing Claude Code Router. The rough steps are as follows (you can also ask Trae to execute them) to prepare the environment:
|
||||
|
||||
```markdown
|
||||
npm install -g @anthropic-ai/claude-code
|
||||
npm install -g @musistudio/claude-code-router
|
||||
```
|
||||
|
||||
After installation, you need to confirm the `ccr` command is available locally. If you see output similar to the following, installation is successful:
|
||||
|
||||

|
||||
|
||||
Next, there are two ways to initialize and configure models:
|
||||
|
||||
- Use CCR's built-in UI and configure on its browser page.
|
||||
- Directly edit CCR's default configuration file (the UI essentially edits the config file as well, just with a more intuitive interface).
|
||||
|
||||
If you choose CCR UI, you will see an interface similar to this:
|
||||
|
||||

|
||||
|
||||
At this point, click the "Add Provider" button to see the following interface. You need to:
|
||||
|
||||
1. Enter the provider name in Name;
|
||||
2. Fill in that provider's OpenAI-compatible endpoint in API Full URL;
|
||||
3. Fill in the corresponding platform API key in API Key;
|
||||
4. Fill model names in Models area, then click "Add Model";
|
||||
5. Finally click "Save" to persist configuration.
|
||||
|
||||
(If you scroll downward there are many advanced options, but you can ignore them for now.)
|
||||
|
||||

|
||||
|
||||
Here are configuration examples for DeepSeek and Kimi:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
After saving model configuration, you also need to specify the default model in the Router area on the right. Select from the dropdown and set it to `kimi` (recommended), then click `Save and Restart` in the top-right corner.
|
||||
|
||||

|
||||
|
||||
After that, simply run `ccr code` in terminal to start Claude Code workflow through Claude Code Router.
|
||||
|
||||

|
||||
|
||||
:::
|
||||
|
||||
#### Advanced Usage of Claude Code
|
||||
|
||||
Many people initially use Claude Code only as a normal chat tool. But in fact it has many built-in capabilities that can make your workflow more efficient and flexible. Here are common commands and usage examples:
|
||||
|
||||
Reference docs:
|
||||
|
||||
https://docs.claude.com/en/docs/claude-code/cli-reference
|
||||
https://docs.claude.com/en/docs/claude-code/slash-commands
|
||||
|
||||
| Command | Purpose | Example |
|
||||
| ----------------- | ----------------------------------------- | ---------------------------------------- |
|
||||
| claude | Start interactive mode | `claude` |
|
||||
| claude "query" | Run one-off task and output result | `claude "explain this project"` |
|
||||
| claude -p "query" | Ask one-off question and auto-exit | `claude -p "explain this function xxxx"` |
|
||||
| claude -c | Continue most recent session | `claude -c` |
|
||||
| claude -r | Resume previous session | `claude -r` |
|
||||
| /resume | Switch to previous session in current chat | `claude -c`, `/resume` |
|
||||
| /plugin | Manage plugins and install submit/review extensions | `/plugin` |
|
||||
| /init | Initialize project description with CLAUDE.md | `/init` |
|
||||
| /clear | Clear current context to prevent overload | `/clear` |
|
||||
| /compact | Compress history and reduce context token usage | `/compact` |
|
||||
| /cost | View current cost usage | `/cost` |
|
||||
| /model | Switch model (usually ignorable with compatible APIs) | `/model` |
|
||||
| /memory | Manage CLAUDE.md memory file | |
|
||||
| /help | Show available command list | `/help` |
|
||||
| exit or Ctrl+C | Exit Claude Code | `exit` or `Ctrl+C` |
|
||||
| /agents | Advanced feature, explained later | |
|
||||
| /mcp | Advanced feature, explained later | |
|
||||
|
||||
**CLAUDE.md**
|
||||
|
||||
Reference: https://www.anthropic.com/engineering/claude-code-best-practices
|
||||
|
||||
`CLAUDE.md` is a special file that Claude automatically reads and includes in context at the beginning of a session. So it is very suitable for recording:
|
||||
|
||||
- Common bash commands
|
||||
- Core files and utility functions
|
||||
- Code style conventions
|
||||
- Testing method notes
|
||||
- Repository collaboration conventions (for example branch naming, merge vs rebase, etc.)
|
||||
- Development environment setup notes (for example whether to use pyenv, preferred compiler, etc.)
|
||||
- Behaviors or pitfalls that need extra attention in the project
|
||||
- Any information you want Claude to "remember"
|
||||
|
||||
`CLAUDE.md` itself has no strict format requirement, as long as it is concise and human-readable. For example:
|
||||
|
||||
```
|
||||
# Bash commands
|
||||
- npm run build: Build the project
|
||||
- npm run typecheck: Run the typechecker
|
||||
|
||||
# Code style
|
||||
- Use ES modules (import/export) syntax, not CommonJS (require)
|
||||
- Destructure imports when possible (eg. import { foo } from 'bar')
|
||||
|
||||
# Workflow
|
||||
- Be sure to typecheck when you’re done making a series of code changes
|
||||
- Prefer running single tests, and not the whole test suite, for performance
|
||||
```
|
||||
|
||||
#### Internal Principles of Claude Code
|
||||
|
||||
Reference: https://github.com/shareAI-lab/analysis_claude_code
|
||||
|
||||
If you are curious why Claude Code performs better than Trae or Cursor agent tools in many scenarios, we can briefly look at its internal working mechanism.
|
||||
|
||||
The overall implementation style of other CLI AI coding tools is broadly similar.
|
||||
|
||||

|
||||
|
||||
Claude Code decomposes coding tasks into a continuous "perceive - think - act - verify" loop and invokes different tools in the loop to complete work. It imitates human developer workflow: continuously "write code -> run -> inspect result -> improve again." Internally, a main task loop continuously executes steps. In each cycle, Claude can call different tools, such as reading/writing files, executing commands, and searching code, then decide next actions based on real tool outputs.
|
||||
|
||||
Several key characteristics are worth noting:
|
||||
|
||||
- **Stream Processing**: Claude can think while outputting results, instead of waiting to finish all code before execution.
|
||||
- **Intelligent Compression**: Long conversations can make context too large. Claude compresses history into key information to reduce "forgetting," and distinguishes long-term vs short-term memory to keep execution efficient.
|
||||
- **Concurrency Control**: Internal parallel design allows multiple tasks to proceed simultaneously without interference.
|
||||
- **Sub-agent Management**: In real work it is not just one single "role" handling everything. You can manage multiple sub-agents collaboratively, each responsible for different tasks, such as dedicated testing or documentation agents.
|
||||
|
||||
### Codex
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Similar to Claude Code, Codex is an AI collaborative coding tool developed by OpenAI. You can think of it as the "OpenAI version of Claude Code." Its biggest advantage is efficient adaptation to GPT-5.
|
||||
|
||||
From practical experience, GPT-5 currently responds faster and makes fewer mistakes (higher success probability in complex multi-round tasks). One drawback is that explanations can feel more "academic" and technical, sometimes too rigorous and information-dense, which can be slightly harder for beginners.
|
||||
|
||||
You can install Codex with the following command:
|
||||
|
||||
```
|
||||
npm i -g @openai/codex
|
||||
```
|
||||
|
||||
#### Use Official OpenAI API as the Backend
|
||||
|
||||
If you directly use the official OpenAI entry for Codex, setup is very simple. Once you have OpenAI subscription access or corresponding API quota, you only need to run `codex` in command line and follow the prompts to complete login.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Use Relayed OpenAI API as the Backend
|
||||
|
||||
Because official OpenAI API can have issues such as high cost and strict network requirements, we can also avoid those restrictions by routing through other API gateway services.
|
||||
|
||||
With this approach, we only need to buy corresponding Codex API quota on a third-party relay platform, and we can get an experience close to native OpenAI Codex.
|
||||
|
||||
Reference: https://open-dev.feishu.cn/wiki/PAqUwWG4IiuwTvkQ2sGcaQuPnXc
|
||||
Recharge URL: https://api.zyai.online/account/topup/recharge
|
||||
|
||||
One thing to note: after obtaining token quota, we still need to configure the API key locally.
|
||||
|
||||
In key-group settings, make sure you choose the item specifically for Codex.
|
||||
|
||||

|
||||
|
||||
Next, we need to fill the key you obtained into the prompt below, then give the entire prompt to Trae so it can complete the whole configuration process for you:
|
||||
|
||||
````bash
|
||||
My API key is: [Paste your obtained sk-xxxxx key here]
|
||||
|
||||
Please help me complete the following configuration tasks:
|
||||
|
||||
1. Create configuration directory
|
||||
- Create a `.codex` folder under my user directory
|
||||
- Windows path should be: `C:\Users\[My Username]\.codex`
|
||||
2. Backup existing configuration (if exists)
|
||||
- Check if `.codex\config.toml` exists
|
||||
- If it exists, rename it to `config.toml.bak.[current timestamp]` (timestamp format: yyyyMMddHHmmss)
|
||||
3. Create configuration file
|
||||
- Create `config.toml` in the `.codex` directory
|
||||
- Write the following complete content:
|
||||
```toml
|
||||
preferred_auth_method = "apikey"
|
||||
|
||||
[model_providers.myrelay]
|
||||
name = "My Relay Station"
|
||||
base_url = "https://api.zyai.online/v1"
|
||||
env_key = "MYRELAY_API_KEY"
|
||||
wire_api = "responses"
|
||||
request_max_retries = 4
|
||||
stream_max_retries = 10
|
||||
stream_idle_timeout_ms = 300000
|
||||
|
||||
[profiles.myrelay]
|
||||
model_provider = "myrelay"
|
||||
model = "gpt-5"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[tools]
|
||||
web_search = true
|
||||
|
||||
4. Set system environment variable
|
||||
Variable name: MYRELAY_API_KEY
|
||||
Variable value: The key I gave you
|
||||
|
||||
5. Confirm completion and report back:
|
||||
|
||||
The full path of the configuration file
|
||||
Whether the environment variable was set successfully
|
||||
I can use the command `codex --profile myrelay` to run it
|
||||
````
|
||||
|
||||
After configuration, you can launch Codex with relayed API through `codex --profile myrelay`. Usage afterward is similar to Claude Code: just keep entering your ideas and requirements in chat at any time.
|
||||
|
||||
## More Use Cases for CLI AI Coding Tools
|
||||
|
||||
### Use AI to Write Requirement Documents: Learn to "Concretize Requirements"
|
||||
|
||||
For large language models, abstract requirements should be "concretized." For example, "I'm hungry" is an abstract requirement. We need to convert it into: "My stomach is a little hungry. I may need a red-bean bun and a cup of soy milk." That becomes an executable, concrete requirement.
|
||||
|
||||
But turning abstract requirements into concrete ones is actually a process that costs a lot of energy. If we have not seen enough examples, it is hard to quickly think of how to break abstract problems into detailed modules. In such cases, the best approach is to let AI help with this "concretization" step.
|
||||
For example, if I want to build a daily planner app, the most basic idea might be:
|
||||
|
||||
`Please help me write a daily planning app where I can write my plans each day and get reminders.`
|
||||
|
||||
AI can indeed split tasks directly from this requirement and complete them step by step, but errors or understanding drift can occur in the middle. To reduce this risk, we can ask AI to expand the requirement first:
|
||||
|
||||
`Based on my needs, please elaborate and provide a more detailed Product Requirement Document for reference. My idea is: Please help me write a daily planner app that supports daily plan-writing and provides reminders .`
|
||||
|
||||
At this point, AI may produce a complete PRD like the following:
|
||||
|
||||
```
|
||||
Product Requirements Document (PRD): “Today’s Plan” App
|
||||
Document Version: 1.0
|
||||
Creation Date: October 27, 2023
|
||||
Author: (Your Name/Product Manager)
|
||||
|
||||
1. Product Introduction
|
||||
1.1 Product Name
|
||||
Today's Plan
|
||||
1.2 Product Vision
|
||||
“Today’s Plan” is a minimalist and highly efficient daily planning and reminder tool, dedicated to helping users eliminate procrastination and forgetfulness, plan every day clearly, and ensure tasks are carried out through an intelligent reminder system—ultimately enabling users to gain a stronger sense of control and achievement over their time.
|
||||
1.3 Target Users (User Personas)
|
||||
We mainly serve three types of users:
|
||||
Students (Xiao Ming):
|
||||
Characteristics: Multiple tasks such as courses, assignments, club activities, exam prep, needing organized time arrangement.
|
||||
Pain Points: Easily forget small tasks or assignment deadlines; feel overwhelmed switching between tasks; want to build regular study and life habits.
|
||||
Needs: A simple tool to list daily to-dos and provide reminders before class/self-study.
|
||||
Office Workers (Zhang Wei):
|
||||
Characteristics: Fast-paced work, many meetings, reports, project milestones, and personal affairs (fitness, picking up children).
|
||||
Pain Points: Easily forget important meetings or work milestones; get interrupted by urgent tasks and forget the original plan; feel busy but inefficient at end of day.
|
||||
Needs: Need a tool to quickly record and schedule daily work and send strong reminders at key times (e.g., 15 minutes before meetings).
|
||||
Freelancers/Self-disciplined Seekers (Li Na):
|
||||
Characteristics: High freedom of time, but strong self-management required for work output and personal growth.
|
||||
Pain Points: Easily procrastinate, lack external supervision; start the day without a clear plan, leading to low time utilization.
|
||||
Needs: Need a tool to help build a daily fixed routine (Morning Routine) and review daily achievements for positive feedback.
|
||||
|
||||
2. User Stories
|
||||
As a user, I want to quickly create today’s plan list so I have an overview of all my tasks for the day.
|
||||
As a user, I want to set specific start and end times for each task so I can create a visual timeline.
|
||||
As a user, I want to receive push notification reminders before a task starts so I won’t miss any important arrangements.
|
||||
As a user, I want to customize the reminder time (such as 5, 15, or 60 minutes in advance) so reminders better fit my habits.
|
||||
As a user, I want to easily mark completed tasks so I can feel accomplished and clearly see my progress.
|
||||
As a user, I want to see a summary of my completed plans at the end of each day for reviewing and self-motivation.
|
||||
As a user, I want to conveniently edit and delete tasks to handle last-minute changes.
|
||||
As a user, I want to view plans and achievements from previous days to review my efficiency and habits.
|
||||
|
||||
3. Feature Breakdown
|
||||
Core Features (MVP - Minimum Viable Product)
|
||||
Module 1: Plan Management
|
||||
3.1.1 Daily Plan Homepage
|
||||
Interface: “Today” as the core view, current date shown at the top.
|
||||
View: Timeline list, clearly showing tasks scheduled from morning to evening. Tasks without a time can be listed in the top or bottom “To-do List” section.
|
||||
Interactions:
|
||||
Click the “+” button in the bottom right to quickly create a new task.
|
||||
Pull down to refresh the page.
|
||||
Swipe left/right to view yesterday’s and tomorrow’s plans.
|
||||
3.1.2 Create/Edit Task
|
||||
Entry: Click “+” on the homepage or a time slot in the list.
|
||||
Fields:
|
||||
Task title (required): Briefly describe the task, e.g., “10 AM Weekly Product Meeting.”
|
||||
Task time (optional):
|
||||
Set “start time” and “end time.”
|
||||
Provide “all-day” option for unspecified time tasks.
|
||||
Default time picker should be quick and convenient.
|
||||
Reminder setting (required, with default value): See Module 2.
|
||||
Notes (optional): Add further descriptions, links, or location info.
|
||||
Actions: Save, cancel, delete task.
|
||||
3.1.3 Task Interaction
|
||||
Mark as complete: Checkbox before each task; checking adds a strikethrough and gray background, indicating completion. Can unmark if needed.
|
||||
Edit task: Click the task itself to enter edit page.
|
||||
Delete task: Swipe left on a task to reveal “Delete” button.
|
||||
Module 2: Smart Reminder System
|
||||
3.2.1 Reminder Trigger
|
||||
Mechanism: Based on task’s set “start time” and the user’s “reminder lead time,” send a push notification from device.
|
||||
Offline Support: Locally scheduled reminders must trigger even if user is offline.
|
||||
3.2.2 Reminder Content & Format
|
||||
Notification title: App name “Today’s Plan.”
|
||||
Body: “Reminder: [Task Title] will start at [Start Time].” E.g., “Reminder: Product Meeting will start at 10:00.”
|
||||
Sound: Use system default or offer several simple, effective tones.
|
||||
3.2.3 Reminder Settings
|
||||
Global Settings (in Settings page):
|
||||
User can set a default reminder time, e.g., “15 minutes before task starts.” New tasks adopt this by default.
|
||||
Single Task Settings (in create/edit page):
|
||||
Users can override global settings for important tasks, choosing specific reminder times like "on time," "5 minutes early," "30 minutes early," or "1 hour early."
|
||||
Provide “no reminder” option.
|
||||
Subsequent Features (V1.1, V2.0)
|
||||
3.3 Daily Review & Statistics
|
||||
Push a summary notification at a set time every night (e.g., 22:00): “How was your day? Take a look at your achievements!”
|
||||
Generate a simple daily report card: shows total planned tasks, completed tasks, completion rate, plus an encouraging message.
|
||||
3.4 History Review
|
||||
Calendar view to click on any past day and check its plans and completion status. Days with high completion rates marked with a special color.
|
||||
3.5 Templates
|
||||
Allow users to save a successful daily plan as a template, e.g., “Efficient Workday,” “Relaxing Weekend.”
|
||||
When creating tomorrow’s plan, one-click import a template, modify slightly to save time.
|
||||
3.6 Themes & Personalization
|
||||
Offer dark mode.
|
||||
Allow changing several primary color themes.
|
||||
|
||||
4. Non-Functional Requirements
|
||||
4.1 Performance
|
||||
Response: App launch time under 2 seconds; adding/editing tasks must be smooth and lag-free.
|
||||
Resource Use: Low battery and memory consumption in background; do not over-consume resources waiting for reminders.
|
||||
4.2 Usability
|
||||
Minimal & intuitive: UI must be minimal, primary functions accessible within 3 clicks. No tutorial needed for new users.
|
||||
Error tolerance: Offer undo (e.g. brief undo after mistakenly deleting a task).
|
||||
4.3 Reliability
|
||||
Reliable reminders: Reminder function is the product’s lifeline; must guarantee 99.99% timely and accurate delivery.
|
||||
Data loss-free: User plans must be reliably stored locally. Future versions can support cloud sync to prevent data loss on device change.
|
||||
4.4 Compatibility
|
||||
Platform: Support major iOS and Android versions (latest 3-4 releases).
|
||||
Screen: Layout must fit various phone screen sizes.
|
||||
|
||||
5. Roadmap
|
||||
V1.0 (MVP):
|
||||
Goal: Validate core value—planning & reminders.
|
||||
Features: Complete all “Core Features” described above (Plan management, smart reminders).
|
||||
V1.1 (Quick Optimization):
|
||||
Goal: Improve retention and achievement.
|
||||
Features: Add “Daily Review & Statistics,” “History Review.”
|
||||
V2.0 (Enhanced Experience):
|
||||
Goal: Increase efficiency and personalization.
|
||||
Features: Add “Templates,” “Themes & Personalization,” and start developing “Cloud Sync.”
|
||||
```
|
||||
|
||||
Compared with our initial sentence "help me write an app where I can record plans and get reminders every day," this document is now far more detailed. You can add, remove, and revise content based on real needs. For modules you are unsure about, you can keep asking AI for more alternatives, then select and merge them into a final version.
|
||||
|
||||
In this way, we can easily turn abstract ideas into concrete descriptions. For AI development, "concrete" means productivity. The more concrete the requirement is, the easier it is to get stable structure and higher-quality project output. You can try redoing one of your previous small projects in this way and compare the difference.
|
||||
|
||||
If you feel this kind of "requirement prompt" is too long, a very natural approach is to write it into a standalone Markdown document as your requirement document / development document / PRD. Then each time you ask AI to build a project, you only need to ask it to "refer to this document" instead of retyping long prompts every time. You can also continuously improve this document across iterations so future projects benefit directly.
|
||||
|
||||
Below are some other common use cases:
|
||||
|
||||
### Manage Folders
|
||||
|
||||
We can try using CLI AI coding tools to manage various files in the current folder. For example, if you have a pile of messy files that need sorting and grouping, you can tell Claude Code or Codex:
|
||||
|
||||
`Please help me organize the contents of the current folder. I want to group files with the same content together & I want to group files from the same time period together. Please help me handle this.`
|
||||
|
||||
### Develop New Projects
|
||||
|
||||
This is almost exactly the same as how we previously used z.ai and Trae. We can directly use CLI AI coding tools to develop brand-new projects from scratch. Of course, it is best to prepare a requirement document in advance.
|
||||
|
||||
The more detailed the requirement document, the better the final result. You can optimize that document across multiple rounds as your ideas evolve. The more complete the document, the more stable and mature the implementation usually becomes.
|
||||
|
||||
### Deploy Open-Source Projects (for example Dify)
|
||||
|
||||
For learners who are new to computers, deploying an open-source project from GitHub is often difficult. But we can fully hand this over to Claude Code, just as we did in the Dify tutorial:
|
||||
|
||||
https://github.com/langgenius/dify
|
||||
|
||||
If I want to run my own local Dify, I only need to throw this link to Claude Code, then type:
|
||||
|
||||
`I want to deploy this GitHub project ``https://github.com/langgenius/dify`` . Please help me clone the project and run it.`
|
||||
|
||||
After receiving your request, Claude Code will automatically complete a series of operations, including pulling code from GitHub, configuring runtime environments, and starting the project. If any step fails or startup status is abnormal, you only need minor manual handling based on prompts. Beyond Dify, you can also ask Claude Code to deploy most common open-source GitHub projects for you. You just need one chat box and the time to drink a cup of coffee ☕️.
|
||||
|
||||

|
||||
|
||||
### Explain Code and Write Documentation
|
||||
|
||||
For some complex projects, or large projects generated by AI, you may feel the code is too long and logic is too dense to understand. At this time, you can ask CLI AI coding tools to "read code" for you. You can ask like this:
|
||||
|
||||
- Please explain this project to me: how to run it, how to use it, and how to modify and continue developing it later.
|
||||
- Please explain the overall workflow of this project: how does the program run, and what actions can users perform in the interface?
|
||||
- Please write complete documentation for this project, including development docs and run docs.
|
||||
- Based on everything in my current folder, write a detailed explanation and save it into a specified Markdown document.
|
||||
|
||||
### More Use Cases
|
||||
|
||||
Of course, CLI AI coding tools can do far more than what we listed above. Do not treat them only as "code-writing tools." Treat them as intelligent agents with independent action capabilities. You can ask them to:
|
||||
|
||||
- Manage and organize local files;
|
||||
- Write journals and summaries;
|
||||
- Analyze and fix system errors;
|
||||
- Execute various repetitive command-line tasks.
|
||||
|
||||
In the near future, it may become your most important and most understanding AI companion on your computer.
|
||||
@@ -0,0 +1,3 @@
|
||||
# How to Integrate Stripe and Other Billing Systems
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
@@ -0,0 +1,951 @@
|
||||
<script setup>
|
||||
import { relatedArticlesMap } from '@theme/data/relatedArticles'
|
||||
|
||||
const relatedArticles = relatedArticlesMap['en/stage-2/frontend/2.0-lovart-assets'] ?? []
|
||||
</script>
|
||||
|
||||
# Starting from NanoBanana: Build Your Own Asset Production Agent
|
||||
|
||||
## Chapter 1: Generate Your First Image Asset in 1 Minute
|
||||
|
||||
Before we discuss design, style, or prompt engineering, let's generate the first image with the fewest possible steps.
|
||||
|
||||
### 1.1 Meet NanoBanana
|
||||
|
||||
Before discussing design style and prompt engineering, let's solve a more important thing first: **confirm that you can actually generate an image.**
|
||||
|
||||
Mainstream large models now already support image generation and editing. These are usually called **generative models**.
|
||||
|
||||
To keep the process as simple as possible, this tutorial uses a model with stable image generation and editing capabilities as the example: NanoBanana. It is an image generation model from Google. Its formal name is **Gemini 3.1 Flash Image Preview**. It supports direct image generation from natural language, and also supports editing based on existing images.
|
||||
|
||||

|
||||
|
||||
In terms of core capability, it is not fundamentally different from other models you may have heard of (such as GPT-4o, Claude, Qwen, Midjourney, and others): **you provide the description, and the model generates the result.**
|
||||
|
||||

|
||||
|
||||
You can think of it as a "brush." In this chapter we care about only one thing:
|
||||
👉 **can this brush draw its first stroke in your hands?**
|
||||
|
||||
In practical usage, NanoBanana can be used directly through official platforms like **Google AI Studio**, and it can also be integrated into development workflows via **API**. This tutorial uses the API approach. A NanoBanana 2 model is also available now, and you can try the latest model as well.
|
||||
|
||||
### 1.2 A "Hello World" Level Generation
|
||||
|
||||
Before we start, you only need to complete these three steps:
|
||||
|
||||
1. Create a new folder in Trae
|
||||
|
||||

|
||||
|
||||
2. Create a new Python file
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
3. Paste the full code below
|
||||
|
||||
Trae will automatically complete environment setup and dependency installation. No extra configuration is needed.
|
||||
|
||||
The code uses a NanoBanana API Key. We will not expand on the application process here. As long as you can obtain the key and fill in the corresponding parameter, that is enough. **At this stage, you do not need to understand every line of code. It only needs to run successfully.**
|
||||
|
||||
```Python
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "gradio>=4.0.0",
|
||||
# "pillow>=10.0.0",
|
||||
# "requests>=2.31.0",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import gradio as gr
|
||||
import requests
|
||||
import base64
|
||||
from PIL import Image
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
# 配置 API 信息
|
||||
NANOBANANA_API_URL: str = "YOUR API URL"
|
||||
NANOBANANA_API_KEY: str = "YOUR API KEY"
|
||||
OUTPUT_DIR: str = "outputs"
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def image_to_base64_data_uri(image: Image.Image) -> str:
|
||||
"""
|
||||
将 PIL 图像转换为 OpenAI API 兼容的 data URI 格式。
|
||||
"""
|
||||
buffer = io.BytesIO()
|
||||
# 统一转为 PNG 以保证兼容性
|
||||
image.save(buffer, format="PNG")
|
||||
encoded = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
return f"data:image/png;base64,{encoded}"
|
||||
|
||||
def base64_to_image(base64_str: str) -> Optional[Image.Image]:
|
||||
"""
|
||||
将纯 base64 字符串转换为 PIL Image。
|
||||
"""
|
||||
try:
|
||||
image_bytes = base64.b64decode(base64_str)
|
||||
return Image.open(io.BytesIO(image_bytes))
|
||||
except Exception as e:
|
||||
print(f"Base64 解码失败: {e}")
|
||||
return None
|
||||
|
||||
def extract_base64_from_response(content: Any) -> Optional[str]:
|
||||
"""
|
||||
核心解析逻辑:从 API 返回的 content 中提取图片 Base64 数据。
|
||||
兼容 Markdown 格式和结构化列表格式。
|
||||
"""
|
||||
if not content:
|
||||
return None
|
||||
|
||||
base64_data = None
|
||||
|
||||
# 1. 尝试结构化提取 (List)
|
||||
# 对应返回格式: [{"type": "image_url", "image_url": {"url": "data:..."}}]
|
||||
if isinstance(content, list):
|
||||
for part in reversed(content): # 倒序查找,通常最新的图片在最后
|
||||
if isinstance(part, dict):
|
||||
# 检查 image_url 或 output_image 字段
|
||||
img_field = part.get("image_url") or part.get("image") or part.get("output_image")
|
||||
if isinstance(img_field, dict):
|
||||
url = img_field.get("url", "")
|
||||
if url.startswith("data:image/") and "," in url:
|
||||
return url.split(",", 1)[1].strip()
|
||||
|
||||
# 如果列表中没有结构化图片,尝试把列表里的文本拼起来找 Markdown
|
||||
text_parts = [
|
||||
str(p.get("text", ""))
|
||||
for p in content
|
||||
if isinstance(p, dict) and p.get("type") in ["text", "input_text"]
|
||||
]
|
||||
content_str = "".join(text_parts)
|
||||
else:
|
||||
content_str = str(content)
|
||||
|
||||
# 2. 尝试 Markdown 正则提取 (String)
|
||||
# 对应返回格式: "Here is your image: "
|
||||
pattern = re.compile(r"!\[.*?\]\((data:image/[^;]+;base64,[^)]+)\)", re.IGNORECASE)
|
||||
match = pattern.search(content_str)
|
||||
|
||||
if match:
|
||||
data_url = match.group(1)
|
||||
if "," in data_url:
|
||||
return data_url.split(",", 1)[1].strip()
|
||||
|
||||
return None
|
||||
|
||||
def synthesize(prompt: str, input_image: Optional[Image.Image]) -> Optional[Image.Image]:
|
||||
"""
|
||||
调用 Nanobanana API 进行生成。
|
||||
"""
|
||||
if not prompt or not prompt.strip():
|
||||
gr.Warning("请输入提示词")
|
||||
return None
|
||||
|
||||
print(f">>> 开始任务: {prompt[:50]}...")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {NANOBANANA_API_KEY}"
|
||||
}
|
||||
|
||||
# 构造符合 OpenAI Vision / Chat 标准的 payload
|
||||
messages = []
|
||||
|
||||
if input_image is not None:
|
||||
# 图生图/多模态输入模式
|
||||
print(">>> 检测到输入图片,使用多模态模式")
|
||||
img_base64 = image_to_base64_data_uri(input_image)
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{"type": "image_url", "image_url": {"url": img_base64}}
|
||||
]
|
||||
})
|
||||
else:
|
||||
# 纯文生图模式
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
payload = {
|
||||
"messages": messages,
|
||||
# 使用第一段代码中验证可用的模型
|
||||
"model": "gemini-2.5-flash-image",
|
||||
# 可选参数,视 API 支持情况而定
|
||||
"stream": False
|
||||
}
|
||||
|
||||
try:
|
||||
# 增加超时时间,图片生成通常较慢
|
||||
response = requests.post(NANOBANANA_API_URL, headers=headers, json=payload, timeout=120)
|
||||
|
||||
# 检查 HTTP 状态
|
||||
if response.status_code != 200:
|
||||
error_msg = f"API 请求失败: {response.status_code} - {response.text}"
|
||||
print(error_msg)
|
||||
gr.Error(error_msg)
|
||||
return None
|
||||
|
||||
result = response.json()
|
||||
# Debug: 打印返回结果的前一部分,方便调试
|
||||
print(f"API 原始响应 (截取): {str(result)[:200]}...")
|
||||
|
||||
# 提取 Content
|
||||
content = None
|
||||
if "choices" in result and len(result["choices"]) > 0:
|
||||
content = result["choices"][0].get("message", {}).get("content")
|
||||
|
||||
if not content:
|
||||
gr.Warning("API 返回结果中没有 content 字段")
|
||||
return None
|
||||
|
||||
# 使用之前验证过的逻辑提取 Base64
|
||||
base64_str = extract_base64_from_response(content)
|
||||
|
||||
if base64_str:
|
||||
output_image = base64_to_image(base64_str)
|
||||
if output_image:
|
||||
return output_image
|
||||
|
||||
# 如果没提取到图片,可能是模型拒绝了或只返回了文本
|
||||
text_content = str(content) if not isinstance(content, list) else " ".join([str(x) for x in content])
|
||||
gr.Info(f"未生成图片,模型返回文本: {text_content[:100]}...")
|
||||
return None
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
gr.Error("请求超时,请稍后重试")
|
||||
return None
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
gr.Error(f"发生未知错误: {str(e)}")
|
||||
return None
|
||||
|
||||
# Gradio 界面配置
|
||||
with gr.Blocks(title="Nanobanana Image Generator") as app:
|
||||
gr.Markdown("# 🍌 Nanobanana Text/Image to Image")
|
||||
gr.Markdown("基于 Gemini-2.5-Flash-Image 模型,支持文生图与图生图。")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
prompt_input = gr.Textbox(
|
||||
label="提示词 (Prompt)",
|
||||
placeholder="例如: A cyberpunk cat holding a neon sign...",
|
||||
lines=3
|
||||
)
|
||||
image_input = gr.Image(
|
||||
label="参考图 (可选,用于图生图)",
|
||||
type="pil",
|
||||
height=300
|
||||
)
|
||||
submit_btn = gr.Button("开始生成", variant="primary")
|
||||
|
||||
with gr.Column():
|
||||
image_output = gr.Image(label="生成结果", format="png")
|
||||
|
||||
submit_btn.click(
|
||||
fn=synthesize,
|
||||
inputs=[prompt_input, image_input],
|
||||
outputs=image_output
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.launch(share=True)
|
||||
```
|
||||
|
||||
When Trae indicates successful execution, click the local link it provides (usually `http://127.0.0.1:7860`).
|
||||
|
||||

|
||||
|
||||
If everything is correct, you will see a working AI drawing interface.
|
||||
|
||||
This interface looks simple, but it already includes two of the most important capabilities in commercial-grade drawing tools: text-to-image and image-to-image.
|
||||
|
||||
* **Left side:** **Instruction area (** **Input** Zone) - this is where you issue commands.
|
||||
* **Prompt (prompt box):** Enter your creative description (English is recommended).
|
||||
* **Input** Image (reference image box):
|
||||
* **Text-to-image mode:** keep it **empty**.
|
||||
* **Image-to-image mode:** drag a local image here, and AI will create based on it.
|
||||
* **Submit button:** click to send instructions and start generation.
|
||||
* **Right side: display area (** **Output** Zone) - this is where results appear.
|
||||
|
||||

|
||||
|
||||
Now we can try generating your first image.
|
||||
|
||||
The example prompt used here is:
|
||||
|
||||
> **A red apple**
|
||||
|
||||
This is intentionally simplified, without style details or parameter constraints.
|
||||
|
||||
#### Actual Process
|
||||
|
||||
After running the code, the flow can be summarized in three steps:
|
||||
|
||||
1. Send the text description to the model
|
||||
2. The model generates the corresponding image
|
||||
3. The image is saved as a local file
|
||||
|
||||
After a few seconds, you will see generated results locally. Because model generation is stochastic, the same prompt can produce different outputs. You can generate multiple times and choose the image you prefer.
|
||||
|
||||

|
||||
|
||||
You can also enrich your prompt with more constraints and descriptions. For example, the prompt below tends to generate a more distinctive result:
|
||||
|
||||
```Plain
|
||||
"A hyper-realistic close-up of a fresh red apple with water droplets on its skin, sitting on a dark rustic wooden table. Cinematic dramatic lighting, rim light, shallow depth of field, bokeh background, 8k resolution, macro photography."
|
||||
(一个超写实的带水珠的新鲜红苹果特写,放在深色粗糙木桌上。电影级戏剧光效,轮廓光,浅景深,背景虚化,8k分辨率,微距摄影。)
|
||||
```
|
||||
|
||||

|
||||
|
||||
Click download in the Output Image area to save the image locally.
|
||||
|
||||

|
||||
|
||||
### 1.3 Common Material-Generation Scenarios for Image Models
|
||||
|
||||
In real work, large-model image generation is more often used for **efficiently producing design assets**, rather than creating one-off art pieces.
|
||||
|
||||
If you look at high-engagement cases from design marketing accounts, you will find that most outputs are concentrated in two scenarios:
|
||||
|
||||
* **Text-to-image (0 to 1)**
|
||||
* **Reference-image generation (1 to N)**
|
||||
|
||||
#### 1) Text-to-Image: Quickly Get Design Assets
|
||||
|
||||
This category is about efficiency. When you need to fill visual blanks in design (such as empty states, avatars, and illustrations), AI essentially acts as an **instant stock-image library**.
|
||||
|
||||
1. ##### Generate UI Design Assets
|
||||
|
||||
* Trend: frosted-glass and clay-style 3D icons, common on Dribbble
|
||||
* Typical appearance: translucent materials, glowing edges, candy-like color palettes for functional or weather icons
|
||||
|
||||
**Example Prompt:**
|
||||
|
||||
> A set of 3D weather icons (sun, cloud, rain), glassmorphism style, frosted glass texture, soft pastel gradient colors, soft studio lighting, isometric view, transparent background, 4k.
|
||||
|
||||
(一套 3D 天气图标,毛玻璃风格,磨砂质感,柔和渐变色,影棚光,等轴视图)
|
||||
|
||||

|
||||
|
||||
2. ##### Generate Logos
|
||||
|
||||
* Trend: minimalist lines and geometric combinations with a tech feel
|
||||
* Typical appearance: black-and-white color schemes, negative space, clear brand identity
|
||||
|
||||
**Example Prompt:**
|
||||
|
||||
> Minimalist vector logo design for a tech brand "Coffee Code", combining a coffee cup with coding brackets < >, flat design, solid black lines, white background, Paul Rand style, svg.
|
||||
|
||||
(极简矢量 Logo,结合咖啡杯与代码符号,扁平设计,纯黑线条)
|
||||
|
||||

|
||||
|
||||
3. ##### Generate Website User Avatars
|
||||
|
||||
* Trend: SaaS websites often use 3D virtual avatars to avoid real-person copyright risk
|
||||
* Typical appearance: friendly expressions, cartoon proportions, Pixar- or Memoji-like styles
|
||||
|
||||
**Example Prompt:**
|
||||
|
||||
> Close-up portrait of a friendly young tech professional, smiling, Memoji 3D style, clay render, bright colors, soft lighting, solid plain background, Pixar character design.
|
||||
|
||||
(友好的年轻科技从业者,3D Memoji 风格,黏土渲染)
|
||||
|
||||

|
||||
|
||||
4. ##### Generate Article Illustrations
|
||||
|
||||
* Trend: abstract flat illustrations commonly used in tech-company blogs
|
||||
* Typical appearance: purple-blue palettes, exaggerated character proportions, floating UI elements
|
||||
|
||||
**Example Prompt:**
|
||||
|
||||
> Editorial flat illustration representing remote work, a person sitting on a giant globe using a laptop, corporate memphis art style, vibrant colors (purple and teal), vector texture.
|
||||
|
||||
(远程办公主题扁平插画,企业孟菲斯风格)
|
||||
|
||||

|
||||
|
||||
#### 2) Reference-Image Generation: Keep Visual Consistency
|
||||
|
||||
This category focuses more on **scalability**. Use it when you already have a satisfactory key visual and need to generate a full set of assets in the same style.
|
||||
|
||||
5. ##### Generate a Similar Set of Buttons or Interaction Assets from a Key Visual
|
||||
|
||||
In game development, UI consistency is very important. Suppose you already have a main-screen **"PLAY"** button and now need to expand a full set of function buttons in a unified style (such as pause, settings, home). With pure manual drawing, it is hard to keep gloss, perspective, and color values fully consistent across every button.
|
||||
|
||||
**Basic workflow:**
|
||||
|
||||
1. Save the existing blue "PLAY" button image
|
||||
|
||||

|
||||
|
||||
2. Drag it into the **Input**** Image** area as the reference master
|
||||
3. Keep style descriptions in the prompt unchanged and only modify the subject content
|
||||
|
||||
With this flow, you can get different functions in the same style by only changing subject descriptions.
|
||||
|
||||
**Example Prompt:**
|
||||
|
||||
**Variant A: Pause Button (icon type)**
|
||||
|
||||
> A capsule-shaped game UI button with a white pause icon (two vertical bars) inside. Same glossy blue jelly style, shiny plastic texture, white thick outline, vector illustration, high quality.
|
||||
|
||||
(胶囊形游戏 UI 按钮,白色暂停图标,蓝色果冻质感)
|
||||
|
||||

|
||||
|
||||
**Variant B: Settings Button (complex icon)**
|
||||
|
||||
> A capsule-shaped game UI button with a white gear icon (settings symbol) inside. Same glossy blue jelly style, shiny plastic texture, white thick outline, vector illustration, high quality.
|
||||
|
||||
(胶囊形游戏 UI 按钮,白色齿轮图标,蓝色果冻质感)
|
||||
|
||||

|
||||
|
||||
**Variant C: Replay Button (shape variation)**
|
||||
|
||||
If you need to change the button shape, describe that shape directly in the prompt. The model will try to change the structure while keeping material characteristics.
|
||||
|
||||
> A round game UI button with a white circular arrow icon (replay symbol) inside. Same glossy blue jelly style, shiny plastic texture, white thick outline, vector illustration, high quality.
|
||||
|
||||
(圆形游戏 UI 按钮,循环箭头图标,蓝色果冻质感)
|
||||
|
||||

|
||||
|
||||
With this set of operations, you can not only change button function and icon, but also button shape, while keeping high consistency in material, color, and lighting. This is exactly the core value of large models in design-asset scaling scenarios.
|
||||
|
||||
## Chapter 2: A More Controllable Image Generation Assistant - Lovart as an Example
|
||||
|
||||
In the first part, we directly called NanoBanana with code and experienced the basic "input -> generate" flow. This works when requirements are simple. But as tasks include more constraints, for example:
|
||||
|
||||
* multiple images with consistent style
|
||||
* repeated iteration on existing results
|
||||
* dynamically adjusting generation direction based on user input
|
||||
|
||||
the one-shot calling pattern gradually becomes insufficient.
|
||||
|
||||
At this point, we need to introduce an **AI Agent**. This section uses **Lovart** as an example to show how the overall workflow changes when image generation gains a "thinking layer." Note: this is not an advertisement. It is only to help everyone quickly grasp the convenience of AI Agents.
|
||||
|
||||
### 2.0 First Look at Lovart: Your AI Design Agent
|
||||
|
||||
Lovart is an agent-based web design tool. Compared with ordinary image generation tools, it adds one extra layer of "thinking and planning" before generation.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
After entering Lovart, you mainly need to understand the following controls:
|
||||
|
||||
#### Model Selection
|
||||
|
||||
Click the cube icon below the input box to view currently available generation models (such as GPT Image, Flux, etc.).
|
||||
|
||||
To stay consistent with earlier examples, this section still uses NanoBanana as the underlying generation model.
|
||||
|
||||

|
||||
|
||||
#### Thinking Mode
|
||||
|
||||
This is Lovart's core switch:
|
||||
|
||||
* **Fast Mode (⚡):** close to native API behavior, fast response, suitable for single images with clear instructions
|
||||
* **Thinking Mode (💡):** agent mode, where AI first decomposes requirements and rewrites prompts, then generates
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Internet Capability
|
||||
|
||||
After enabling the globe icon, the agent can retrieve online information during generation (for example design trends and color styles) as auxiliary input.
|
||||
|
||||
### 2.1 Why Is Native API Still Not Enough?
|
||||
|
||||
Even if you can already generate good images via Python, native APIs still have limitations in complex tasks. The key reason is that native APIs are fundamentally imperative. If you ask for a concrete object, they can execute directly. But when the input becomes "plan a complete set of game assets," they will not proactively decompose that goal into executable substeps.
|
||||
|
||||
Lovart's core difference is its agent mechanism. Between user input and the image generation model, it adds a logic layer for understanding and planning: first identify user intent, then decompose tasks and rewrite prompts, and only then execute generation.
|
||||
|
||||
### 2.2 Practical Demo: Build a Full IP Sticker Pack in 5 Minutes
|
||||
|
||||
Take **"create an IP sticker pack of a programmer duck"** as an example and look at how the agent participates in the full workflow.
|
||||
|
||||
#### Step 1: Planning (Agent Thinking Capability)
|
||||
|
||||
**Native API issue:**
|
||||
You need to think through character settings and emotional states yourself, and write separate prompts for every image.
|
||||
|
||||
**Lovart approach:**
|
||||
|
||||
1. Turn on 💡 **Thinking Mode**
|
||||
2. Input one instruction:
|
||||
|
||||
> 设计一套程序员鸭子的 IP 表情包,风格要扁平化、可爱
|
||||
|
||||
AI does not draw immediately. It first searches online for relevant programmer-duck references, then outputs a decomposed plan, automatically creates scenarios such as Debug, Coffee Break, Panic, and generates multiple visual descriptions.
|
||||
|
||||

|
||||
|
||||
At this step, AI shifts from "executor" to "planner." After AI analyzes the requirement, you can see programmer-duck images with multiple styles and contents on the Lovart canvas and start selecting your preferred style.
|
||||
|
||||

|
||||
|
||||
#### Step 2: Consistency (Reference-Based Visual Anchoring)
|
||||
|
||||
In Lovart, images are not only outputs. They are also inputs for follow-up generation.
|
||||
|
||||
##### Full Reference Image
|
||||
|
||||
* Choose your favorite "standard duck" from drafts and click the image on the canvas
|
||||
* The image automatically appears in the dialogue area as a reference
|
||||
|
||||

|
||||
|
||||
* Input a new action (such as happy) and generate
|
||||
|
||||
The generated result will inherit color palette, proportions, and detail characteristics from the master reference.
|
||||
|
||||

|
||||
|
||||
##### Local Reference / Multi-Image Composition
|
||||
|
||||
Besides using full images as references, Lovart also supports:
|
||||
|
||||
* **selecting only local regions** (for example, only reference a hat or expression)
|
||||
|
||||
Click the left tab on the canvas, choose "Mark," and annotate the local region in the target image. That part is automatically synced into the dialogue box. For example, we can change only the background color here.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
You can see the newly generated image only changes the background color, which matches our requirement.
|
||||
|
||||
* **referencing sub-elements from multiple images** and combining them into a new result
|
||||
|
||||
For example: you can keep the main character from image A, while replacing only the hat with the style from image B. The agent automatically merges these visual constraints in the background.
|
||||
|
||||
Using programmer ducks as an example, we can keep the duck from the first image and replace the subject element in the second image.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
The final effect is also very strong. You can try other combinations too.
|
||||
|
||||
#### Step 3: Delivery (Agent Tool Calling)
|
||||
|
||||
After generation, you can directly execute operations such as upscale, background removal, and erasing.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
These are not simple filters. They are results from the agent orchestrating different tools automatically.
|
||||
|
||||
After style direction is confirmed, you can quickly generate a full set of sticker images.
|
||||
|
||||

|
||||
|
||||
What we finally get is production-ready assets that can be delivered directly, not just one showcase image.
|
||||
|
||||
### 2.3 Usage and Pricing Notes
|
||||
|
||||
Lovart uses a subscription model. Different plans correspond to different usage quotas and feature permissions. Refer to the official site for specific details.
|
||||
|
||||
This tutorial does not recommend or compare any specific plan. If you need it in actual use, choose paid upgrades based on your own situation.
|
||||
Currently, payment methods include **Alipay** and others.
|
||||
|
||||

|
||||
|
||||
#### Summary
|
||||
|
||||
Lovart does not replace underlying models. Instead, through an agent mechanism, it upgrades image generation from "single execution" to a "continuous workflow."
|
||||
|
||||
When tasks involve planning, consistency, and delivery, the advantage of this type of tool becomes very clear.
|
||||
|
||||
## Chapter 3: Build an Intelligent Drawing Assistant by Yourself
|
||||
|
||||
Besides using Lovart directly, we can also implement a simplified drawing assistant ourselves.
|
||||
|
||||
In this chapter, we use "automatic illustration for articles" as an example. Starting from a real problem, we build a minimal practical agent with a thinking layer step by step.
|
||||
|
||||
### 3.1 Pain Point: Why Sending Long Articles Directly to an Image Model Does Not Work
|
||||
|
||||
If you directly send a long article to NanoBanana and ask for illustration, the result is usually not ideal. The issue is not that the model "cannot draw." The issue is that **it is not good at understanding long text**.
|
||||
|
||||
Image generation models are better at short and clear visual descriptions. But when the input becomes an article with structure, key points, and contextual relationships, the model cannot determine which parts should be represented visually. This often causes off-topic images, or results that capture only scattered details without overall summarization.
|
||||
|
||||
In essence, image models have "execution" capability but lack an analysis-and-selection process for long text.
|
||||
|
||||

|
||||
|
||||
### 3.2 Solution: Use an Agent to Split "Understanding" and "Execution"
|
||||
|
||||
To solve this, the key is not a more complicated prompt. The key is **to think clearly before drawing**. So we introduce an independent "thinking layer" into the generation flow, and use it to build the simplest practical agent.
|
||||
|
||||
This agent has only one core objective: **make the final generated image match the user's true intent as closely as possible.**
|
||||
|
||||
The full flow can be summarized as:
|
||||
**long-text input -> language-model understanding and intent judgment -> generation of suitable visual prompt -> image-model execution -> output image**
|
||||
|
||||

|
||||
|
||||
How can our agent understand user intent?
|
||||
|
||||
Here we use a simplified **thinking layer** with three intents: invalid input, direct drawing instruction, and long text that needs understanding.
|
||||
|
||||
In this agent, role division can be summarized in four points:
|
||||
|
||||
1. **Language model as decision core**
|
||||
It understands article content, judges user intent, routes tasks to suitable generation paths, and decides "what to do next" and how to generate visual prompts.
|
||||
2. **Image model as executor**
|
||||
The image model does not do understanding or intent judgment. It only receives prepared visual instructions and focuses on rendering.
|
||||
3. **User as interactive guide**
|
||||
Besides entering text directly, users can manually adjust generated prompts or add reference images to guide and fine-tune final results.
|
||||
4. **Gradio and backend APIs as application carrier**
|
||||
They connect UI, model invocation, and result display to ensure the full agent can run stably as a complete web app.
|
||||
|
||||

|
||||
|
||||
### 3.3 Practical Preparation: Obtain APIs
|
||||
|
||||
Looks fun, right? To run the full flow above, we only need two types of APIs.
|
||||
|
||||
#### Hand: NanoBanana API (Image Generation)
|
||||
|
||||
Directly reuse the API Key and API URL already configured in Chapter 1. No additional setup is required.
|
||||
|
||||
#### Brain: SiliconFlow API (Text Thinking)
|
||||
|
||||
We need a large language model to handle the "thinking layer." This tutorial uses model services provided by SiliconFlow:
|
||||
[https://cloud.siliconflow.cn](https://cloud.siliconflow.cn/)
|
||||
|
||||

|
||||
|
||||
SiliconFlow provides interfaces compatible with OpenAI API conventions, so it can be called conveniently via standard network requests. Here we use the free `Qwen2.5-7B-Instruct` model. Everything needed for invocation is already included in the prompt below. Before you start, you only need to register an account and create an API Key on the official site.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
This key will be used for later model calls.
|
||||
|
||||
### 3.4 Build the Agent:
|
||||
|
||||
In this experiment we mainly use Trae to help write code. The tutorial uses `Gemini-3-Pro-Preview`. The overall approach is: create a new project, copy the full prompt below into the dialogue box, replace API keys step by step, run code, and complete testing.
|
||||
|
||||

|
||||
|
||||
#### Step 1️⃣: Gradio Blocks Base Framework and UI Layout
|
||||
|
||||
In this step, our main goal is to build the "appearance" of the whole agent first and complete the front-end page design. Copy the prompt below into Trae. After implementation, you will get a local URL (usually `http://127.0.0.1:7860`) to view the interface and verify the result.
|
||||
|
||||
```Plain
|
||||
板块 1:Gradio Blocks 基础框架与界面布局
|
||||
1、任务目标
|
||||
·基于 Gradio 4.0.0+ 的 Blocks 布局,实现「LLM+Nanobanana 文生图」项目的基础界面,严格遵循固定左右分栏布局,初始化所有 UI 组件并设置正确的初始状态。
|
||||
|
||||
2、技术栈要求
|
||||
·必须使用 Gradio 4.0.0+ 的 Blocks 模式开发,禁止使用 Interface 模式;
|
||||
·依赖:gradio>=4.0.0,pillow>=10.0.0(仅导入,暂不实现图片处理逻辑);
|
||||
·代码需是完整可运行的 Python 文件,包含所有必要的导入语句。
|
||||
|
||||
3、界面布局规则(核心约束,融合实战细节)
|
||||
·整体布局:
|
||||
页面标题:LLM 驱动的文生图全流程工具;
|
||||
固定左右分栏:左侧占 60% 宽度,右侧占 40% 宽度,使用 gr.Row 和 gr.Column 实现比例控制。
|
||||
·左侧 60%(提示词生成流程区)组件清单:
|
||||
input_text:gr.Textbox,标签「输入文本(教程段落 / 绘图指令)」,lines=6,占位符「请输入需要配图的教程文本或直接绘图指令...」;
|
||||
identify_intent_btn:gr.Button,value="识别意图",初始状态正常可点击;
|
||||
intent_status:gr.Textbox,标签「意图类型 / 处理状态」,lines=2,interactive=False,初始值「未识别意图」;
|
||||
system_prompt:gr.Textbox,标签「System Prompt(仅文章配图意图可编辑)」,lines=4,interactive=False,占位符「LLM 生成提示词的约束规则...」;
|
||||
confirm_prompt_btn:gr.Button,value="确认生成生图提示词",interactive=False(初始禁用防误触);
|
||||
generation_prompt:gr.Textbox,标签「生图提示词(可编辑)」,lines=3,interactive=True,初始值为空,占位符「生成的英文生图提示词将显示在此,支持手动修改...」。
|
||||
·右侧 40%(Nanobanana 生图功能区)组件清单:
|
||||
ref_image:gr.Image,标签「参考图(可选,图生图)」,type=filepath,height=300,允许上传;
|
||||
generate_btn:gr.Button,value="生成图片",interactive=False(初始禁用,无提示词不可点击);
|
||||
result_image:gr.Image,标签「生成结果」,type=pil,height=300,初始为空,interactive=False。
|
||||
|
||||
4、交互逻辑要求
|
||||
·所有组件的 interactive 初始状态严格按上述配置,后续通过函数动态更新;
|
||||
·按钮禁用状态需直观(置灰),避免用户误操作。
|
||||
|
||||
5、输出要求
|
||||
·生成完整的 Python 代码,仅实现界面布局和组件初始化,不包含任何业务逻辑;
|
||||
·代码注释清晰,组件命名与实战版一致(input_text/identify_intent_btn 等);
|
||||
·代码可直接运行,界面结构与描述完全一致。
|
||||
```
|
||||
|
||||
After opening `http://127.0.0.1:7860` in the browser, you can see Trae generated the page according to requirements. It is generally aligned, and we can move on to the next step.
|
||||
|
||||

|
||||
|
||||
#### Step 2️⃣: LLM Intent Recognition Module (SiliconFlow API)
|
||||
|
||||
When using VLMs for drawing in daily work, there are usually three common input cases:
|
||||
|
||||
1. Meaningless content, such as "hello" or "have you eaten today," which cannot map to drawable requirements.
|
||||
2. Articles/long text, such as a structured paragraph around 200 words, where you must first understand structure/content before generating an image that summarizes the text.
|
||||
3. Direct drawing instructions, such as "draw a dog taking a bath," where requirements are already specific enough for immediate generation.
|
||||
|
||||
As before, copy the prompt below into Trae and add the API obtained in earlier steps.
|
||||
|
||||
```Plain
|
||||
板块 2:LLM 意图识别模块(Siliconflow API)
|
||||
1、任务目标
|
||||
在已实现的 Gradio 界面基础上,为「识别意图」按钮添加点击逻辑,调用 Siliconflow API 完成意图识别,并联动组件状态。
|
||||
|
||||
2、技术栈要求
|
||||
基于 Gradio 4.0.0+ Blocks;
|
||||
依赖:requests>=2.31.0,openai;
|
||||
输出完整可运行 Python 文件,包含板块 1 界面 + 本模块逻辑。
|
||||
|
||||
3、核心业务规则(绝对不可偏离)
|
||||
·意图分类规则(仅 3 类,严格返回数字 + 描述)
|
||||
1 = 无意义内容:仅闲聊、寒暄、无关对话,没有任何绘图或配图需求(如 “你好”“今天吃了吗”);
|
||||
2 = 文章 / 长文本配图需求:用户输入一段完整文章、教程、段落、说明性文字,内容偏叙事 / 说明 / 讲解,隐含需要为这段内容生成配图的意图,不需要用户明确说 “为这段文字配图”;
|
||||
3 = 直接绘图指令:用户输入简短、明确的画图命令,没有长文本背景,直接要求画某个内容(如 “画一只 Apple 风格的猫”)。
|
||||
·LLM 调用约束(融合实战版模板)
|
||||
接口地址:https://api.siliconflow.cn/v1/chat/completions;
|
||||
模型:Qwen/Qwen2.5-7B-Instruct;
|
||||
temperature=0.1;
|
||||
统一定义代码:
|
||||
python
|
||||
运行
|
||||
LLM_BASE_URL = "https://api.siliconflow.cn/v1"
|
||||
LLM_API_KEY = "" # 用户自行替换
|
||||
LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"# 实战验证的意图识别模板(固化到代码中)
|
||||
INTENT_PROMPT_TEMPLATE = """你需要识别用户输入文本的意图,仅返回以下 3 类结果中的一种(格式:数字 + 中文描述):
|
||||
1 = 无意义内容;2 = 文章 / 长文本配图需求;3 = 直接绘图指令。
|
||||
|
||||
用户输入:{user_input}
|
||||
|
||||
识别结果:
|
||||
仅提取返回结果中的数字和描述,禁止额外内容。"""
|
||||
|
||||
4、组件联动规则
|
||||
·结果为 1:intent_status 显示「1 = 无意义内容:无绘图需求」,system_prompt 保持禁用,confirm_prompt_btn 禁用;
|
||||
·结果为 2:intent_status 显示「2 = 文章 / 长文本配图需求:为输入内容生成配图」,启用 system_prompt 并填充默认规则,激活 confirm_prompt_btn;
|
||||
·结果为 3:intent_status 显示「3 = 直接绘图指令:根据指令生成图片」,system_prompt 禁用且填充默认规则,激活 confirm_prompt_btn。
|
||||
|
||||
5、异常处理
|
||||
API 异常、解析异常均给出友好提示,不崩溃,组件恢复初始状态。
|
||||
|
||||
6、输出要求
|
||||
生成完整可运行代码,替换 LLM_API_KEY 即可使用,逻辑清晰注释完整,意图识别模板严格使用实战版。
|
||||
```
|
||||
|
||||
Refresh `http://127.0.0.1:7860` and test whether it correctly detects all three cases.
|
||||
|
||||
1. Meaningless content: try inputting "你好", "谢谢", and so on. It should be recognized correctly.
|
||||
|
||||

|
||||
|
||||
2. Article/long text: here we use a paragraph about AI generated by Doubao. You can also test with your own paper paragraph.
|
||||
|
||||
```Plain
|
||||
人工智能正在以前所未有的深度和广度重塑教育生态系统。通过自适应学习算法,AI系统能够构建每个学生的认知图谱,实时追踪他们的知识掌握轨迹,并动态调整教学内容的难度和呈现方式。在传统课堂环境中,教师往往难以同时满足不同学习风格和能力水平的学生需求,而基于深度学习的教育平台可以分析学生在交互式模拟实验中的行为模式,识别他们在量子力学或微积分等复杂概念理解上的微妙障碍,并提供精准的认知支架。
|
||||
|
||||
高级自然语言处理引擎驱动的虚拟导师不仅能够解构开放性问题,如"如何评价法国大革命对现代民主制度的影响",还能引导苏格拉底式对话,激发批判性思维。当学生撰写关于气候变化对极地生态系统影响的论文时,AI写作助手可以分析其论证逻辑的严密性,指出数据引用中的时效性问题,并建议更精准的科学术语。在特殊教育领域,计算机视觉技术使AI能够识别自闭症谱系儿童在社交互动中的非语言线索,调整干预策略,而情感计算算法则帮助检测在线学习时的挫折感,及时提供鼓励性反馈。
|
||||
|
||||
然而,这种技术融合引发了一系列伦理困境。算法偏见可能无意中边缘化特定文化背景的学生,数据采集的透明度问题引发了对学术隐私的关切,而过度依赖自动化评分系统可能削弱教师对学生思维过程的深层理解。更复杂的是,当AI开始生成高度逼真的虚拟实验室体验时,我们需要重新定义"实践经验"在教育中的价值。未来教育的范式可能演变为人类教师专注于培养创造力、同理心和道德判断力,而AI系统则承担知识传递、技能训练和个性化评估的职能,形成一种协同进化的教育共生体,既能发挥机器的计算优势,又能保留人类教育的独特温度.
|
||||
```
|
||||
|
||||
This is also detected successfully.
|
||||
|
||||

|
||||
|
||||
3. Direct drawing instruction: here we input "我要画一只猫", and it is also correctly detected.
|
||||
|
||||

|
||||
|
||||
At this point, we have successfully completed step 2: intent recognition.
|
||||
|
||||
#### Step 3️⃣: Prompt Generation Module (Second LLM Call)
|
||||
|
||||
After intent recognition, for articles or long text there is one more crucial step: generating the drawing prompt. This is exactly the core of this agent.
|
||||
|
||||
```SQL
|
||||
板块 3:生图提示词生成模块(LLM 二次调用)
|
||||
1、任务目标
|
||||
在意图识别基础上,实现「确认生成生图提示词」按钮逻辑,调用 LLM 将文本优化为适合绘图的英文视觉提示词,填充到编辑框并联动「生成图片」按钮。
|
||||
|
||||
2、技术栈要求
|
||||
同板块 2,输出完整代码 = 板块 1 + 板块 2 + 本模块;
|
||||
共用板块 2 定义的 LLM_BASE_URL、LLM_API_KEY、LLM_MODEL,不新增密钥。
|
||||
|
||||
3、核心业务规则(融合实战版 Prompt 组装逻辑)
|
||||
·提示词生成输入规则(必须严格遵循)
|
||||
生图提示词生成不再是简单字符串拼接,而是构建标准 Chat 消息列表,代码结构如下:
|
||||
python
|
||||
运行
|
||||
messages=[# System角色:网页上用户最终确认/编辑后的system_prompt内容{"role": "system", "content": final_system_prompt},# User角色:承载待处理数据,明确任务目标{"role": "user", "content": f"请为以下内容生成视觉提示词:\n\n{user_input}"}]
|
||||
意图为 2 时:System 内容取用户编辑后的 system_prompt 最终版本;
|
||||
意图为 3 时:System 内容取禁用状态下填充的默认规则
|
||||
user_input 为用户最初输入到 input_text 框的原始文本。
|
||||
·实战验证的 System Prompt 预设(固化到代码中)
|
||||
python
|
||||
运行
|
||||
SYSTEM_PROMPT_DEFAULT = """你现在是一个创建NanoBanana画图提示词的助手。
|
||||
需要根据我的内容处理,我这个图片的作用是能说明这一段在说什么,并且让大家知道这段话的上下结构就是整体说的是什么意思。
|
||||
里面可能会类似PPT有一些讲解(如:左上角展示核心观点,右下角展示数据)。
|
||||
设计风格要求:简约,Apple设计思维(Apple Design Philosophy)。
|
||||
约束:请直接返回NanoBanana可用的英文提示词,不要返回任何解释、前缀或多余的废话。"""
|
||||
·LLM 调用约束
|
||||
与板块 2 共用同一套 LLM_BASE_URL、LLM_API_KEY、LLM_MODEL;
|
||||
temperature=0.7(保证提示词的创意性与适配性);
|
||||
max_tokens=200(限制输出长度,匹配提示词约束);
|
||||
严格使用上述标准 Chat 消息列表结构,禁止字符串拼接。
|
||||
·示例输入输出(核心参考)
|
||||
输入示例 1(文章配图意图):原始文本:「AI 如何改变教育:随着人工智能技术的发展,教师的角色从知识传授者转变为引导者,AI 助手可辅助学生完成个性化学习,课堂上人机协作成为常态。」最终 System Prompt:SYSTEM_PROMPT_DEFAULT(未修改)输出预期:"Minimalist illustration, Apple Design Philosophy, 1024x1024. Top left shows 'AI + Education' core concept, bottom right shows data of teacher-student-AI collaboration, soft color palette, clean lines, no redundant elements."
|
||||
输入示例 2(直接绘图指令):原始文本:「画一只 Apple 风格的猫,坐在 MacBook 旁边」最终 System Prompt:SYSTEM_PROMPT_DEFAULT(禁用状态)输出预期:"Minimalist cat, Apple style, 1024x1024, sitting next to a silver MacBook, clean white background, soft shadows, geometric shapes, no extra details."
|
||||
·提示词输出强制约束
|
||||
纯英文,无中文;
|
||||
必须包含 Apple Design Philosophy/Apple style + 1024x1024;
|
||||
长度 50–200 字符,代码内校验;
|
||||
无额外解释、前缀或废话,仅返回提示词本身。
|
||||
|
||||
4、组件联动规则
|
||||
生成成功:将提示词填入 generation_prompt 框,激活 generate_btn,intent_status 追加「提示词生成成功,可修改后生成图片」;
|
||||
生成失败:提示具体原因(如 API 调用失败、长度不达标),generate_btn 保持禁用,generation_prompt 框为空;
|
||||
用户手动修改 / 清空 generation_prompt 框:
|
||||
清空时自动禁用 generate_btn;
|
||||
非空时保持 generate_btn 激活。
|
||||
|
||||
5、异常处理
|
||||
API 调用失败:友好提示「提示词生成失败:{具体错误信息}」,不崩溃;
|
||||
提示词校验失败:明确提示原因(如 “未包含 Apple style”“长度仅 40 字符”),允许重试;
|
||||
响应解析失败:提示「无法解析 LLM 返回结果,请重试」。
|
||||
|
||||
6、输出要求
|
||||
完整可运行代码,替换 LLM_API_KEY 即可使用;
|
||||
代码结构清晰、注释完善,界面美观简洁;
|
||||
严格实现标准 Chat 消息列表结构,参数与示例逻辑一致;
|
||||
包含提示词长度、内容校验逻辑,错误提示友好。
|
||||
```
|
||||
|
||||
Use the same long text from step 2 for testing.
|
||||
|
||||
It is worth noting that the default System Prompt we preset for prompt generation is:
|
||||
|
||||
> 你现在是一个创建NanoBanana画图提示词的助手。
|
||||
> 需要根据我的内容处理,我这个图片的作用是能说明这一段在说什么,并且让大家知道这段话的上下结构就是整体说的是什么意思。
|
||||
> 里面可能会类似PPT有一些讲解(如:左上角展示核心观点,右下角展示数据)。
|
||||
> 设计风格要求:简约,Apple设计思维(Apple Design Philosophy)。
|
||||
> 约束:请直接返回NanoBanana可用的英文提示词,不要返回任何解释、前缀或多余的废话。
|
||||
|
||||
If you want to switch to other preset templates, you can modify the earlier prompt or directly modify it through Trae dialogue.
|
||||
|
||||

|
||||
|
||||
Besides changing underlying code, we can also edit quickly on the webpage. For example, I added one line, "add 'Pic Prompt' at the beginning." You can see the new generated prompt also starts with it. This design is for quickly adjusting the system prompt for generation, so we can switch styles fast.
|
||||
|
||||

|
||||
|
||||
#### Step 4️⃣: NanoBanana Text-to-Image / Image-to-Image Module
|
||||
|
||||
Finally we are at the last step. Without connecting an image model, it is not a complete agent.
|
||||
|
||||
```Bash
|
||||
板块 4:Nanobanana 文生图 / 图生图模块(最终版)
|
||||
1、任务目标
|
||||
实现「生成图片」按钮逻辑,调用真实 Nanobanana API,支持文生图 / 图生图,解析 Base64 并展示图片。
|
||||
|
||||
2、技术栈要求
|
||||
基于 Gradio 4.0.0+ Blocks;
|
||||
依赖:requests, pillow, base64, io, re;
|
||||
完整代码 = 板块 1+2+3 + 本模块。
|
||||
|
||||
3、核心 API 配置(实战验证固化)
|
||||
固化代码配置:
|
||||
python
|
||||
运行
|
||||
# 固化到代码中的API配置
|
||||
NANOBANANA_API_URL = "https://api.zyai.online/v1/chat/completions"
|
||||
NANOBANANA_MODEL = "gemini-2.5-flash-image"
|
||||
NANOBANANA_API_KEY = "" # 用户自行替换
|
||||
鉴权方式:Header Authorization: Bearer {NANOBANANA_API_KEY}。
|
||||
|
||||
4、图片预处理要求(必须实现)实现函数 image_to_base64_data_uri (ref_image_path),核心逻辑:
|
||||
将 PIL 图片转为 PNG 格式;
|
||||
自动缩放到 1024x1024 分辨率;
|
||||
透明通道转为白色背景;
|
||||
编码为 Base64,返回格式:data:image/png;base64,...。
|
||||
|
||||
5、请求构建规则(严格按实战版分支逻辑)
|
||||
·核心函数定义实现函数 generate_image (prompt, ref_image_path):
|
||||
入参:prompt(generation_prompt 框内容)、ref_image_path(ref_image 上传的文件路径);
|
||||
返回:PIL Image(展示到 result_image)或错误提示。
|
||||
·逻辑分支 1:纯文生图(ref_image_path 为空)
|
||||
python
|
||||
运行
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
·逻辑分支 2:图生图(ref_image_path 有值)
|
||||
python
|
||||
运行
|
||||
# 先调用图片预处理函数
|
||||
image_base64 = image_to_base64_data_uri(ref_image_path)
|
||||
messages = [{"role": "user","content": [{"type": "text", "text": prompt},{"type": "image_url", "image_url": {"url": image_base64}}]}]
|
||||
|
||||
6、响应解析要求(必须兼容两种格式)从 choices [0].message.content 中提取图片 Base64,支持:
|
||||
结构化 JSON 返回的 image_url 字段;
|
||||
Markdown 格式
|
||||
;
|
||||
统一提取 Base64 编码,解码后转换为 PIL Image 返回。
|
||||
|
||||
7、组件联动与异常处理
|
||||
生成成功:将 PIL Image 展示到 result_image,intent_status 提示「图片生成成功」;
|
||||
生成 / 解析 / 上传失败:在 intent_status 显示清晰文字提示(如 “Base64 解析失败”“API 调用超时”),不崩溃。
|
||||
|
||||
8、输出要求
|
||||
完整可运行代码,替换 LLM_API_KEY 和 NANOBANANA_API_KEY 即可直接运行,全流程可用,分支逻辑严格匹配实战版。
|
||||
```
|
||||
|
||||

|
||||
|
||||
So exciting. We finally generated the first image of this agent. Looking closely, the generated image matches both our text and prompt. At this point, you have basically implemented your own agent.
|
||||
|
||||

|
||||
|
||||
We also added image-to-image. Upload an image you like, and AI will automatically borrow style cues.
|
||||
|
||||

|
||||
|
||||
It is also worth mentioning that prompts generated in earlier steps can be edited directly on the webpage, and generation always uses the final prompt at click time. Even if I change it here to "a cute cat," the final output will be just a cute kitten.
|
||||
|
||||
## Chapter 4: Summary
|
||||
|
||||

|
||||
|
||||
**Whew, finally finished.**
|
||||
Honestly, when I finished the last line, I exhaled deeply myself, and you followed the full path to here. Running through this full workflow is already impressive by itself. It means you really put your hands on the keyboard and completed things step by step. Bravo.
|
||||
|
||||
During the writing of this tutorial, I kept asking what we really want to leave behind. The answer is not model names, parameter values, or fixed tricks. It is helping you gradually build a feel for division of labor: what AI can safely understand and plan for you, and where you only need to decide direction. Once this division is established, many workflows that once looked complex start becoming smooth.
|
||||
|
||||
Looking back, this path is not actually complicated. Clarify the problem you want to solve, let a language model decompose long text, then pass organized visual intent to an image model for rendering, and finally package the full process into your own assistant. At that point, you are no longer simply "using models." You are building a system that can work with you over the long term. That is exactly what this tutorial most wants to deliver.
|
||||
|
||||
But you already did great. If you have made it this far, you already have a solid initial grasp of Vibe Coding. Give yourself a short break.
|
||||
|
||||
<RelatedArticlesSection
|
||||
title="Related Articles"
|
||||
description='If you want to truly connect "asset generation" into product workflows, continue with these chapters.'
|
||||
:items="relatedArticles"
|
||||
/>
|
||||
@@ -0,0 +1,307 @@
|
||||
# Figma and MasterGo Basics
|
||||
|
||||
::: tip Core Question
|
||||
**How do you start using modern design tools from scratch to build web prototypes?**
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 1. Why learn frontend design tools?
|
||||
|
||||
Before we begin, we need to answer a simple question: why bother learning frontend design tools at all? If you can already build pages with HTML and CSS, is it really necessary to learn one more tool?
|
||||
|
||||
In practice, "making a page run" and "designing a good product" are two different things. Code focuses on how something renders in the browser and how it behaves across devices. Design tools focus on how information is arranged, how interactions are sequenced, and how visual priority is communicated. With a single canvas, you can compare layout, information hierarchy, and interaction patterns on one screen before writing code.
|
||||
|
||||
If you jump straight into implementation or ask AI to generate a full frontend page immediately, the user experience is often rough. Serious products think carefully about comfort, hierarchy, and communication across different screens. A better workflow is to arrange the interface first from the user's perspective, then convert or generate the code.
|
||||
|
||||
From a collaboration standpoint, design tools also reduce coordination cost. Designers, product managers, and developers no longer need to imagine the same screen from vague explanations or abstract code. Everyone can discuss versioning, requirement changes, and feedback around a visible, annotatable, iterative canvas. Modern design tools are no longer just drawing software either. They can generate part of the code, manage design systems and component libraries, and automate repetitive work such as alignment, annotation, exporting, and style changes.
|
||||
|
||||

|
||||
|
||||
### 1.1 The evolution of frontend design tools
|
||||
|
||||
Frontend design tools are the result of a long evolution. In the 1990s, Photoshop dominated with local bitmap editing. Around 2010, Sketch introduced vector-first, component-oriented workflows. After 2016, Figma pushed collaboration into the cloud and turned solo design work into real-time teamwork. By 2025, AI had become a practical part of these tools, from "generate a draft from one sentence" to "turn a design into runnable frontend structure." "Design as code" and "human-AI co-creation" are no longer just slogans.
|
||||
|
||||
In this chapter, we will focus on two representative modern design tools: Figma and MasterGo. They both cover the core abilities needed for modern UI and UX work, including vector editing, component systems, auto layout, and developer handoff. They have also both added practical AI features that help turn a prototype into a runnable interface without changing the overall design intent.
|
||||
|
||||
## 1.2 How this toolchain emerged
|
||||
|
||||

|
||||
|
||||
Before dedicated interface tools existed, UI design was largely handled by "general-purpose" design tools such as Photoshop. Designers built entire interfaces locally using layered PSD files, then handed those heavy source files to frontend engineers. To recreate the design accurately, frontend engineers had to do three tedious but essential jobs manually.
|
||||
|
||||
The first was **asset slicing**: extracting buttons, icons, logos, backgrounds, and other visual elements one by one from a PSD file, then exporting them as PNG or JPG files the web could actually load.
|
||||
|
||||

|
||||
|
||||
The second was **measuring dimensions**: manually checking widths, heights, and spacing between elements to ensure everything matched the design pixel by pixel.
|
||||
|
||||

|
||||
|
||||
The third was **reading annotations by hand**: pulling out the "invisible but required" design parameters such as font size, font weight, line height, RGB or HEX colors, shadows, and so on.
|
||||
|
||||

|
||||
|
||||
Only after that did actual frontend implementation begin. Whether the stack is plain HTML/CSS/JS or frameworks like Vue and React, the core process is similar. The frontend rebuilds the page around containers, based on the hierarchy and semantics of the design. A container is a layout boundary that organizes child elements without directly being the final content itself. Structural blocks such as top navbars, sidebars, article lists, and footers rely on containers; inside each block, smaller containers arrange finer elements such as titles, descriptions, timestamps, or thumbnails.
|
||||
|
||||

|
||||
|
||||
In modern frontend frameworks, these structural blocks are typically implemented as **components**. A component is a reusable interface unit with clear boundaries. It includes both layout containers and interaction logic. Any repeated piece of design, such as a consistent button style or a reusable article card, can be abstracted into a component so it can be reused across different pages while keeping layout and styling consistent.
|
||||
|
||||
The styling layer then restores the visual appearance. Exported image assets become `<img>` tags or background images. Measured dimensions become CSS properties such as `width`, `height`, `margin`, `padding`, and `line-height`. Typography, color, shadow, border radius, and hover or active states become CSS, CSS Modules, CSS-in-JS, or Tailwind rules. At this point, exported assets and annotations provide the visual parameters, while components and structural blocks provide the code organization that makes the interface maintainable and reusable.
|
||||
|
||||

|
||||
|
||||
But the local-file workflow was fundamentally inefficient. Versions were sent through email or cloud drives, old and new drafts were easy to confuse, and collaboration required a lot of manual coordination.
|
||||
|
||||
As mobile interfaces became more complex and iteration speed increased, Photoshop's "do everything" model became too heavy. Sketch appeared in this phase. It focused on UI work itself, introduced Symbols for highly reusable elements such as buttons and form controls, and paired well with tools like Zeplin for automatic annotations and style snippets. Sketch brought component thinking into design workflows. Still, it remained a desktop tool built around local files, so real-time collaboration never became native.
|
||||
|
||||

|
||||
|
||||
Figma truly changed the game. Starting in 2016, it unified UI design, prototyping, comments, and version history in the browser, with multi-user cursors, online comments, timeline history, and shareable links.
|
||||
|
||||

|
||||
|
||||
From that point on, interface design was no longer scattered across separate machines. It became a shared online canvas that updated in real time. Once that happened, the boundary between design and frontend code became easier to blur through automation and AI.
|
||||
|
||||
At first, plugins could only semi-automatically export components and style information into code snippets such as React or Vue skeletons and CSS variables. Later, design platforms began to support MCP, the Model Context Protocol, which gives language models a standard, controlled way to access design files, plugin interfaces, and project metadata. That makes exporting designs into code much more direct.
|
||||
|
||||
The next step after plugins and MCP is native design-to-code generation. Today, some tools can generate project skeletons, component hierarchies, style systems, and real code directly from a design. That frees designers and frontend engineers from manually transferring details and gives them more time to focus on user experience and feature iteration.
|
||||
|
||||
---
|
||||
|
||||
## 2. Figma basics
|
||||
|
||||
Now let's move from concepts to hands-on work. Because of time, we will only cover Figma's core interaction model. The goal is simple: even if you have never used a design tool before, you should be able to follow along and complete the exercise. If you want a more complete walkthrough, you can study Figma's official beginner documentation:
|
||||
|
||||
https://help.figma.com/hc/en-us/sections/30880632542743-Figma-Design-for-beginners
|
||||
|
||||
You can also look at Figma's site-building examples:
|
||||
|
||||
https://help.figma.com/hc/en-us/sections/35895585621655-Figma-Sites-collection
|
||||
|
||||

|
||||
|
||||
On the left is project creation and resource management. In the top-right area, you will see several common entry points. `Make` lets AI generate a rough interface draft from one sentence. `Design` is the main workspace where you build app and web interfaces, components, and prototypes. `FigJam` works like a team whiteboard for notes, flows, and early discussions. `Buzz` is for brand-scale asset production. `Site` is for publishing designs as accessible websites or documentation pages.
|
||||
|
||||
At first glance, Figma looks complex. But tools like this become familiar through repetition. You do not need to be afraid of making mistakes, and you do not need to get everything right on the first try. The key is to start playing with it.
|
||||
|
||||
In this tutorial, we will focus on the `Design` workspace.
|
||||
|
||||
### 2.1 Create a new Design file
|
||||
|
||||
From the homepage or the top-right entry, choose **Design** to create a new file. You will enter a blank canvas.
|
||||
|
||||
This interface is roughly divided into three areas:
|
||||
|
||||
- The left side shows pages and layers so you can inspect the structure of the page and the hierarchy of elements.
|
||||
- The middle area is the canvas where you view and arrange the current design.
|
||||
- The right side is the properties panel where you change shape, color, and style details.
|
||||
- The toolbar lets you switch between selection, shapes, text, comments, and plugins. After selecting a tool, you can press `Esc` to return to the default pointer.
|
||||
|
||||

|
||||
|
||||
### 2.2 Create your first Frame
|
||||
|
||||
Before placing elements, we need a clear page boundary. In Figma, that boundary is handled by a Frame. You can select the Frame tool or press `F`, then drag out a rectangular region on the canvas.
|
||||
|
||||
1. Use the Frame tool in the toolbar or press `F`.
|
||||
2. Drag a rectangle on the canvas and set its width to something like `1440` and height to `900` in the right-side panel.
|
||||
3. Rename the Frame in the layer list to something like `My First Page` or your project name.
|
||||
|
||||
This Frame becomes the container for one complete screen. Your title, text, buttons, and images should all live inside it instead of floating freely on the canvas. Working inside a Frame helps later with scrolling, responsiveness, exporting, and prototyping.
|
||||
|
||||

|
||||
|
||||
### 2.3 Add text and basic elements inside the Frame
|
||||
|
||||
Now that we have a container, let's place the most basic interface elements: a title, subtitle, button, and placeholder image block.
|
||||
|
||||
1. Choose the text tool (`T`) and click inside the Frame to add a title such as `My Portfolio`. Increase the font size and weight in the right panel.
|
||||
2. Add one line of supporting text under the title. Use a smaller font size and slightly larger line height so it reads more comfortably.
|
||||
3. Sketch out a button:
|
||||
Use the rectangle tool to draw something around `200 x 48`, give it a noticeable fill color, and add some border radius.
|
||||

|
||||
4. Add button text on top, such as `Get Started`, then select both the rectangle and the text and align them horizontally and vertically.
|
||||
5. Add a larger light-gray rectangle beside or below the button as a placeholder image area.
|
||||
|
||||
At this point, you already have a very rough but structurally complete homepage draft: a title, a piece of body text, a button, and a main display area.
|
||||
|
||||

|
||||
|
||||
### 2.4 Use Auto Layout to organize elements
|
||||
|
||||
If all elements are positioned manually, the page becomes messy very quickly. One of Figma's most important concepts is **Auto Layout**, which turns a group of elements into a rule-based container.
|
||||
|
||||

|
||||
|
||||
Select the main title, subtitle, and button together, then click **Add Auto layout** in the right panel.
|
||||
|
||||
Those elements are now wrapped inside a container, and you can adjust several useful properties:
|
||||
|
||||
- Whether the elements are arranged vertically or horizontally
|
||||
- The spacing between elements
|
||||
- The padding between the content block and the edge of the container
|
||||
|
||||

|
||||
|
||||
You can use Auto Layout inside the button as well. That gives you a button whose width adjusts automatically when the text changes.
|
||||
|
||||
Select the button background and button text, add Auto Layout, and turn them into a button container. Then set both width and height to **Hug contents**. Once you do that, the text stays centered and the button width grows or shrinks with the text.
|
||||
|
||||

|
||||
|
||||
### 2.5 Turn the button into a reusable component
|
||||
|
||||
Now let's learn another important concept: components. A component is an element designed for repeated reuse. Buttons are a perfect example.
|
||||
|
||||
Starting from the button that already has Auto Layout:
|
||||
|
||||
1. Select the entire button container.
|
||||
2. Right-click and choose **Create component**.
|
||||

|
||||
|
||||
The button is now promoted from a set of ordinary layers to a component master. When you need the same button style somewhere else, you can drag it out from the Assets panel.
|
||||
|
||||

|
||||
|
||||
Every inserted button is now a synchronized instance of that master. If you later change the master's color, corner radius, or spacing, all instances update together.
|
||||
|
||||

|
||||
|
||||
At this point, you already understand the basic usage of Figma. You do not need to master every function on day one. Just build your first simple page, get comfortable with the core operations above, and explore more capabilities over time.
|
||||
|
||||
---
|
||||
|
||||
## 3. MasterGo basics
|
||||
|
||||
Once you understand the basic Figma workflow, MasterGo is much easier to approach. You can think of MasterGo as a China-focused counterpart to Figma with a few differences in product behavior. Overall, it follows a very similar layout and interaction model: canvas, layer tree, property panel, components, styles, auto layout, and multi-person collaboration. For more detail, you can refer to the official MasterGo tutorial:
|
||||
|
||||
https://mastergo.com/tutorials/12?%E5%85%A8%E7%A8%8B%E9%AB%98%E8%83%BD%EF%BC%8CMasterGo%20%E6%9C%80%E5%AE%8C%E6%95%B4%E5%AE%9E%E7%94%A8%E6%95%99%E7%A8%8B%EF%BC%8C%E8%AE%A9%E4%BD%A0%E4%BB%8E%E9%9B%B6%E5%88%B0%E7%B2%BE%E9%80%9A%EF%BC%81
|
||||
|
||||
### 3.1 Create a new design file
|
||||
|
||||
1. **Enter the MasterGo workspace**
|
||||
1. Open the MasterGo website and sign in.
|
||||
2. After entering, you will see a homepage similar to a file list or project list, where your design files are managed.
|
||||

|
||||
|
||||
2. **Create a new file**
|
||||
1. Click the `+ Design File` button in the top-right corner, or choose to import files such as Figma files.
|
||||
2. After clicking, you will enter a blank canvas, which is MasterGo's design workspace.
|
||||
|
||||
3. **Understand the major interface regions**
|
||||
Once you know Figma, MasterGo feels very similar. The main areas are:
|
||||
|
||||

|
||||
1. The top toolbar: file location and name on the left, common tool buttons in the middle, and online collaborators, sharing, zoom, and preview controls on the right.
|
||||
2. The left panel: layers and assets, including the page list and the structure of the current page.
|
||||
3. The central canvas: the workspace where Frames, components, and graphics are actually placed and arranged.
|
||||
4. The right properties panel: used to inspect and edit the selected object's size, position, alignment, fill, stroke, border radius, and more. If nothing is selected, it shows canvas-level settings.
|
||||
|
||||
### 3.2 Create your first Frame
|
||||
|
||||
Before placing content, we need a page container to define the boundary and size of the interface. In MasterGo, this is usually called a Frame.
|
||||
|
||||
**Steps**
|
||||
|
||||
1. **Choose the Frame tool**
|
||||
1. Find the Frame or Artboard tool in the toolbar.
|
||||
2. Or use the keyboard shortcut, usually `F` depending on the current UI.
|
||||
2. **Drag out a rectangular area on the canvas**
|
||||
1. Once you drag it out, you will see a selected region.
|
||||
2. The right properties panel will show its width and height.
|
||||
3. Change the width to something like `1440` and the height to `900`.
|
||||
3. **Rename the Frame**
|
||||
1. Find the Frame in the layer panel.
|
||||
2. Double-click the name and rename it to something like `My First Page`.
|
||||
|
||||

|
||||
|
||||
### 3.3 Build content on the artboard
|
||||
|
||||
Once you have a container, you can build a similar page using the same ideas we already used in Figma. You can even try copying text elements from the Figma artboard directly into MasterGo.
|
||||
|
||||

|
||||
|
||||
One thing worth noting is that Auto Layout behaves a little differently. In MasterGo, if you want button width to expand or shrink with the text, you first need to create a container or component around the rectangle element, as shown below:
|
||||
|
||||

|
||||
|
||||
After creating the container, put the button background and text into that shared container, then enable Auto Layout from the right-side panel. That lets the button width respond to the text length successfully.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 3.4 AI-generated pages
|
||||
|
||||

|
||||
|
||||
One especially interesting feature in MasterGo is AI page generation. You can enter a sentence or provide a reference image, and MasterGo can generate editable components and code for you. You can write the prompt in either Chinese or English. The system will return a clearly structured page draft based on your request.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Once the design document is generated, click to start generation and wait briefly for the rendered result:
|
||||
|
||||

|
||||
|
||||
At this point, you have two options:
|
||||
|
||||
- Click the blue button to insert the generated result directly into the canvas
|
||||
- Open the code preview and get the code for the full current page
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
After inserting the result into the canvas, you can further refine the overall layout and element details such as typography, colors, and spacing until the final result matches your expectations.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 4. Next step: from prototype to code
|
||||
|
||||
In this chapter, you learned the basic operations of both Figma and MasterGo and created structurally complete interface prototypes. The next key question is:
|
||||
|
||||
**How do you convert these design drafts into frontend code that actually runs in the browser?**
|
||||
|
||||
::: tip Next Tutorial
|
||||
For the detailed workflow, continue with [From Design Prototype to Project Code](../2.6-design-to-code/). You will learn:
|
||||
|
||||
- **Direct multimodal AI conversion**: send screenshots of your design to AI and generate HTML or React code directly
|
||||
- **Figma Make**: use Figma's official AI tooling to recreate a design precisely and export code
|
||||
- **MasterGo AI**: generate editable pages and retrieve code in one step
|
||||
|
||||
Each method has strengths and trade-offs, so choose the workflow that fits your project.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 5. Summary
|
||||
|
||||
After finishing this chapter, you should now understand:
|
||||
|
||||
1. **Why frontend design tools matter**: They solve problems around information layout and team collaboration, not just visual output.
|
||||
2. **Basic Figma operations**:
|
||||
- Creating Design files and Frame artboards
|
||||
- Adding text, shapes, and other basic elements
|
||||
- Using Auto Layout for adaptive layouts
|
||||
- Creating reusable component systems
|
||||
3. **Basic MasterGo operations**:
|
||||
- Understanding an interface layout similar to Figma
|
||||
- Creating Frames and basic artboard content
|
||||
- Using AI page generation to prototype faster
|
||||
|
||||
::: tip Next Step
|
||||
Now that you know the basics of modern frontend design tools, you can try:
|
||||
|
||||
- Designing a personal portfolio page for yourself
|
||||
- Designing prototypes for your next project
|
||||
- Continuing to [From Design Prototype to Project Code](../2.6-design-to-code/) to turn designs into runnable code
|
||||
|
||||
If you are working through the [Let's Build Hogwarts Portraits](../2.5-hogwarts-portraits/) project, you can start by designing the interface prototype, then export code and combine it with AI conversation features.
|
||||
:::
|
||||
@@ -0,0 +1,3 @@
|
||||
# Build Your First Modern Application - UI Design
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
@@ -0,0 +1,3 @@
|
||||
# Reference UI Design Specifications and Multi-Product UI Design
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
@@ -0,0 +1,515 @@
|
||||
# Make Interfaces Beautiful with LLMs and Skills: Prompts and Plugin Workflows
|
||||
|
||||
In the previous chapters, you already learned how to turn designs into code with AI IDEs and how to use component libraries to build interfaces quickly. But you may also have noticed an awkward problem: **even with the same requirement, AI-generated pages often feel a bit generic**. The font is always Inter, the color palette is some overused purple gradient, the layout is a perfectly symmetrical card grid, and the page gives off a strong "AI-generated" feeling.
|
||||
|
||||
This is not really AI's fault. The real issue is that you never told it what kind of **style** you wanted.
|
||||
|
||||
Imagine going to a hair salon. If you only say, "Give me a haircut," the stylist will probably choose something safe but forgettable. But if you say, "I want a soft Japanese-style layered wave, curtain bangs, shoulder length, and strong texture," you are much more likely to get exactly what you want.
|
||||
|
||||
The same is true for AI. **It needs a clear aesthetic direction** before it can generate a beautiful and distinctive interface.
|
||||
|
||||
This chapter introduces two practical ways to make AI-generated interfaces look much better:
|
||||
|
||||
1. **Well-designed prompt templates** so you can describe the exact aesthetic you want
|
||||
2. **Frontend Skills plugins** so AI automatically loads reusable design rules
|
||||
|
||||
## What you will learn
|
||||
|
||||
1. Why AI-generated interfaces often look "normal" by default
|
||||
2. How to describe a design style through 5 dimensions: typography, color, layout, motion, and details
|
||||
3. How to use 3 helpful Skills plugins for UI beautification
|
||||
4. How to generate better-looking interfaces through prompts + Skills across three practical scenarios
|
||||
|
||||
## 1. Why do AI-generated interfaces look "ordinary" by default?
|
||||
|
||||
AI was trained on massive amounts of frontend code, and most of that code uses safe, highly repeated choices:
|
||||
|
||||
| Dimension | AI's default choice | Problem |
|
||||
| :--- | :--- | :--- |
|
||||
| Typography | Inter, Roboto, Arial | Too common, no personality |
|
||||
| Color | Purple gradients, blue primary colors | Overused in the tech world, visually tiring |
|
||||
| Layout | Symmetrical grids, stacked cards | Predictable, not memorable |
|
||||
| Motion | Fade-ins, simple hover effects | Not refined enough, lacks depth |
|
||||
| Background | Solid colors, simple gradients | Flat and low-texture |
|
||||
|
||||
Each of these choices is fine on its own. But **once every AI-generated page uses all of them, they start to feel generic and interchangeable**.
|
||||
|
||||
> 💡 **Key insight**: AI can design, but by default it gravitates toward the **statistical average**. Your job is to tell it how to move away from that average.
|
||||
|
||||
## 2. Method One: describe style through prompts
|
||||
|
||||
### 2.1 The 5 dimensions of design style
|
||||
|
||||
To generate a visually strong interface, describe what you want across these five dimensions:
|
||||
|
||||
| Dimension | What to describe | Example keywords |
|
||||
| :--- | :--- | :--- |
|
||||
| **Typography** | Display font for headings, readable body font for text | Space Grotesk, Playfair Display, JetBrains Mono |
|
||||
| **Color** | Primary color + accent color, not evenly distributed | Primary `#4F46E5` + accent `#F59E0B` |
|
||||
| **Layout** | Asymmetry, overlap, grid-breaking structure | Bento Grid, asymmetrical sections, floating elements |
|
||||
| **Motion** | Meaningful page-load and micro-interactions | staggered reveals, scroll-triggered motion |
|
||||
| **Details** | Backgrounds, shadows, borders, textures | grain, geometry, gradient mesh |
|
||||
|
||||
### 2.2 Seeing the difference: generic prompt vs aesthetic prompt
|
||||
|
||||
Let's compare two prompts for the same landing page.
|
||||
|
||||
**Generic prompt:**
|
||||
|
||||
```text
|
||||
Please build a landing page for an AI writing assistant. Include a navbar, hero section, feature section, pricing section, and footer.
|
||||
```
|
||||
|
||||
**Beautified prompt:**
|
||||
|
||||
```text
|
||||
Please build a landing page for an AI writing assistant with the following style requirements:
|
||||
|
||||
**Aesthetic style: Neubrutalism**
|
||||
|
||||
**Typography:**
|
||||
- Headings: Space Grotesk, weight 700-900
|
||||
- Body: IBM Plex Sans, weight 400
|
||||
|
||||
**Colors:**
|
||||
- Primary: #000000
|
||||
- Accent: #FF6B00
|
||||
- Background: #FFFDF0
|
||||
- Borders: 3px solid black
|
||||
|
||||
**Layout:**
|
||||
- Asymmetrical composition
|
||||
- Bold black dividers between regions
|
||||
- Cards with hard shadows (box-shadow: 8px 8px 0px #000)
|
||||
- Strong contrast through generous whitespace
|
||||
|
||||
**Motion:**
|
||||
- Elements pop in from below on page load
|
||||
- Buttons shift upward by 2px on hover
|
||||
|
||||
**Details:**
|
||||
- All corners set to 0px
|
||||
- Buttons should feel strongly 3D
|
||||
- Add subtle grain texture to the background
|
||||
```
|
||||
|
||||
The second prompt gives AI enough direction to produce something bold and memorable instead of something merely functional.
|
||||
|
||||
### 2.3 A resource list of frontend beautification Skills
|
||||
|
||||
You do not need to invent every style prompt from scratch. Here are some useful resources:
|
||||
|
||||
| Repository | What it contains | Stars | Link |
|
||||
|:---|:---|:---|:---|
|
||||
| **ui-ux-pro-max-skill** | 57 styles + 95 color systems + 56 font pairings | 10k+ | [GitHub](https://github.com/nextlevelbuilder/ui-ux-pro-max-skill) |
|
||||
| **antigravity-awesome-skills** | Helps avoid generic AI visual patterns | - | [GitHub](https://github.com/sickn33/antigravity-awesome-skills) |
|
||||
| **superdesigndev/superdesign** | AI-native UI development tooling | 4.7k | [GitHub](https://github.com/superdesigndev/superdesign) |
|
||||
| **anthropics/skills/frontend-design** | Anthropic's official frontend design Skill | - | [GitHub](https://github.com/anthropics/skills) |
|
||||
|
||||
> 💡 For more style prompts, see the [Appendix: Style Prompt Cheatsheet](#style-prompts).
|
||||
|
||||
### 2.5 Three reliable style templates
|
||||
|
||||
Here are three proven templates you can copy and adapt directly.
|
||||
|
||||
#### Template 1: Minimalism
|
||||
|
||||
```text
|
||||
**Aesthetic style: Minimalism**
|
||||
|
||||
**Typography:**
|
||||
- Headings: PP Neue Montreal, weight 500-700
|
||||
- Body: Inter, weight 400
|
||||
|
||||
**Colors:**
|
||||
- Primary: #FFFFFF
|
||||
- Text: #1A1A1A
|
||||
- Accent: #3B82F6, used sparingly
|
||||
|
||||
**Layout:**
|
||||
- Large amounts of whitespace (minimum 64px section padding)
|
||||
- One-column or two-column centered layout
|
||||
- Use spacing instead of divider lines
|
||||
|
||||
**Motion:**
|
||||
- Slow fade-in transitions (duration 600ms)
|
||||
- Soft color transitions on hover
|
||||
|
||||
**Details:**
|
||||
- Radius: 8px
|
||||
- Shadows: subtle (0 4px 12px rgba(0,0,0,0.08))
|
||||
- No decorative background elements
|
||||
```
|
||||
|
||||
#### Template 2: Glassmorphism
|
||||
|
||||
```text
|
||||
**Aesthetic style: Glassmorphism**
|
||||
|
||||
**Typography:**
|
||||
- Headings: Outfit, weight 600-800
|
||||
- Body: Plus Jakarta Sans, weight 400-500
|
||||
|
||||
**Colors:**
|
||||
- Background: gradient from #667eea to #764ba2
|
||||
- Card background: rgba(255, 255, 255, 0.1)
|
||||
- Text: #FFFFFF
|
||||
|
||||
**Layout:**
|
||||
- Floating card design
|
||||
- Slight overlap between cards
|
||||
|
||||
**Motion:**
|
||||
- Cards appear in staggered sequence on page load
|
||||
- Cards scale to 1.05x on hover
|
||||
|
||||
**Details:**
|
||||
- Radius: 20px
|
||||
- Blur: backdrop-blur-xl
|
||||
- Border: 1px rgba(255, 255, 255, 0.2)
|
||||
- Subtle glow effects
|
||||
```
|
||||
|
||||
#### Template 3: Bento Grid
|
||||
|
||||
```text
|
||||
**Aesthetic style: Bento Grid**
|
||||
|
||||
**Typography:**
|
||||
- Headings: SF Pro Display, weight 700
|
||||
- Body: SF Pro Text, weight 400
|
||||
|
||||
**Colors:**
|
||||
- Background: #F5F5F7
|
||||
- Cards: #FFFFFF
|
||||
- Accent: #0071E3
|
||||
|
||||
**Layout:**
|
||||
- Grid-based composition with mixed card sizes
|
||||
- 16px gaps
|
||||
- 24px radius
|
||||
|
||||
**Motion:**
|
||||
- Subtle hover lift
|
||||
- Press feedback on click
|
||||
|
||||
**Details:**
|
||||
- Large cards for primary content
|
||||
- Smaller cards for secondary info
|
||||
- Use icons to replace some text
|
||||
- Clean shadows (0 4px 24px rgba(0,0,0,0.06))
|
||||
```
|
||||
|
||||
## 3. Method Two: use Skills plugins to load design rules automatically
|
||||
|
||||
Writing style prompts by hand every time is tiring. **Skills** are reusable design-rule packages that can be installed once and applied repeatedly.
|
||||
|
||||
### 3.1 Three Skills that make interfaces look better
|
||||
|
||||
| Skill | Key strength | Install command |
|
||||
| :--- | :--- | :--- |
|
||||
| **UI/UX Pro Max** | 67 styles, 96 color systems, 57 font combinations | `npm install -g uipro-cli && uipro init --ai claude` |
|
||||
| **frontend-design** | Anthropic official Skill focused on avoiding generic AI aesthetics | `npx skills add anthropics/skills/frontend-design` |
|
||||
| **SuperDesign** | IDE plugin that generates multiple design variants | Search for `SuperDesign` in the VS Code extension marketplace |
|
||||
|
||||
### 3.2 Install UI/UX Pro Max
|
||||
|
||||
UI/UX Pro Max is one of the most complete design-rule Skills packages available. It includes:
|
||||
|
||||
- **67 UI styles**: Glassmorphism, Neumorphism, Brutalism, Bento Grid, and more
|
||||
- **96 color systems**: organized by product type, such as SaaS, e-commerce, and social apps
|
||||
- **57 font pairings**: validated combinations from professional designers
|
||||
- **100+ design rules**: spacing, corner radius, shadows, and more
|
||||
|
||||
**Installation steps:**
|
||||
|
||||
```bash
|
||||
# 1. Install the CLI globally
|
||||
npm install -g uipro-cli
|
||||
|
||||
# 2. Initialize it for your AI tool
|
||||
uipro init --ai claude
|
||||
# or
|
||||
uipro init --ai cursor
|
||||
# or
|
||||
uipro init --ai trae
|
||||
```
|
||||
|
||||
After installation, you can simply say:
|
||||
|
||||
```text
|
||||
Use UI/UX Pro Max's Glassmorphism style to build me a landing page for an AI writing assistant.
|
||||
```
|
||||
|
||||
The AI will then automatically apply the matching typography, color, and layout conventions.
|
||||
|
||||
### 3.3 Install Anthropic's official `frontend-design` Skill
|
||||
|
||||
This is Anthropic's official frontend design Skill, focused specifically on preventing generic AI output:
|
||||
|
||||
```bash
|
||||
# Run in Claude Code
|
||||
npx skills add anthropics/skills/frontend-design
|
||||
```
|
||||
|
||||
After installation, the AI will tend to avoid:
|
||||
|
||||
- ❌ Inter, Roboto, Arial
|
||||
- ❌ Purple gradient backgrounds
|
||||
- ❌ Symmetrical grid layouts
|
||||
- ❌ Overly soft shadows
|
||||
|
||||
And it will instead lean toward:
|
||||
|
||||
- ✅ More distinctive font combinations
|
||||
- ✅ Strong primary colors with sharper accents
|
||||
- ✅ Asymmetrical or overlapping layouts
|
||||
- ✅ More textured backgrounds such as grain and geometry
|
||||
|
||||
## 4. Practical scenario one: redesign a landing page with aesthetic prompts
|
||||
|
||||
Let's take what we just learned and turn a very ordinary landing page into a much more attractive one.
|
||||
|
||||
### 4.1 The plain version
|
||||
|
||||
Start by seeing what AI gives you with a generic prompt:
|
||||
|
||||
```text
|
||||
Please build a landing page for a pet adoption platform. Include:
|
||||
- a navbar (logo, links, sign-up button)
|
||||
- a hero section (headline, subheadline, CTA button, pet image)
|
||||
- a pet gallery (three pet cards)
|
||||
- an about-us section
|
||||
- a footer
|
||||
```
|
||||
|
||||
The result will probably work, but it will feel pretty average.
|
||||
|
||||
### 4.2 The improved version
|
||||
|
||||
Now add style guidance:
|
||||
|
||||
```text
|
||||
Please build a landing page for a pet adoption platform with the following design requirements:
|
||||
|
||||
**Aesthetic style: warm, soft, with a hand-drawn feeling**
|
||||
|
||||
**Typography:**
|
||||
- Headings: Nunito, weight 700-800
|
||||
- Body: Nunito, weight 400-600
|
||||
|
||||
**Colors:**
|
||||
- Primary: #FFB347
|
||||
- Secondary: #FFCCB3
|
||||
- Background: #FFF8F0
|
||||
- Text: #5D4037
|
||||
|
||||
**Layout:**
|
||||
- Rounded cards (border-radius: 24px)
|
||||
- Slightly tilted cards at different angles
|
||||
- Floating and overlapping elements
|
||||
|
||||
**Motion:**
|
||||
- Elements slide in from both sides on page load
|
||||
- Pet cards slightly rotate on hover like an animal tilting its head
|
||||
- Buttons bounce on hover
|
||||
|
||||
**Details:**
|
||||
- Use 16-24px radii throughout
|
||||
- Warm soft shadows (0 8px 24px rgba(255,179,71,0.3))
|
||||
- Add paw-print decorations in the background
|
||||
- Use irregular image crops via clip-path
|
||||
- Use outline-style hand-drawn icons
|
||||
```
|
||||
|
||||
That version will generate a much warmer, more emotionally convincing interface.
|
||||
|
||||
## 5. Practical scenario two: generate dashboards quickly with Skills
|
||||
|
||||
Skills are especially useful for admin dashboards and internal systems where many pages share the same design language.
|
||||
|
||||
### 5.1 Using UI/UX Pro Max
|
||||
|
||||
```text
|
||||
Use UI/UX Pro Max's Dashboard Dark style and build a dashboard page for a SaaS admin panel that includes:
|
||||
|
||||
**Top:** Four stats cards (users, active users, revenue, API calls)
|
||||
|
||||
**Middle:**
|
||||
- Left: 7-day user growth line chart
|
||||
- Right: subscription plan distribution pie chart
|
||||
|
||||
**Bottom:** a recent activity list showing time, user, and action
|
||||
```
|
||||
|
||||
The Skill will automatically apply a consistent dashboard look:
|
||||
|
||||
- dark gray backgrounds such as `#1A1A2E`
|
||||
- high-contrast cards like `#16213E`
|
||||
- bright data colors such as blue, green, and orange
|
||||
- floating cards with mild glassmorphism effects
|
||||
|
||||
### 5.2 Using `frontend-design`
|
||||
|
||||
```text
|
||||
Use the frontend-design skill and build a homepage for a personal blog. Make it distinctive and full of personality.
|
||||
```
|
||||
|
||||
The AI will typically choose a more specific aesthetic direction, such as retro-futurism or editorial magazine style, and implement it with typography, color, and layout decisions that break out of generic patterns.
|
||||
|
||||
## 6. Practical scenario three: create your own design system Skill
|
||||
|
||||
If your product already has a fixed brand style, you can create your own Skill so every AI-generated page automatically follows it.
|
||||
|
||||
### 6.1 Create the Skill file
|
||||
|
||||
Create `.claude/skills/my-brand/SKILL.md` in your project:
|
||||
|
||||
````markdown
|
||||
---
|
||||
name: my-brand
|
||||
description: My project's custom design system, ensuring every UI follows a consistent visual language
|
||||
---
|
||||
|
||||
# My Project Design System
|
||||
|
||||
## Brand Colors
|
||||
- Primary: #6366F1 (Indigo 500)
|
||||
- Secondary: #8B5CF6 (Violet 500)
|
||||
- Success: #10B981
|
||||
- Warning: #F59E0B
|
||||
- Error: #EF4444
|
||||
- Background: #F9FAFB
|
||||
- Card: #FFFFFF
|
||||
|
||||
## Typography
|
||||
- Headings: Plus Jakarta Sans
|
||||
- H1: 700, 48px
|
||||
- H2: 600, 36px
|
||||
- H3: 600, 24px
|
||||
- Body: Inter
|
||||
- Body: 400, 16px
|
||||
- Small: 400, 14px
|
||||
|
||||
## Spacing
|
||||
- Base unit: 4px
|
||||
- Component padding: 8px / 12px / 16px
|
||||
- Section spacing: 24px / 32px / 48px
|
||||
- Page margin: 64px
|
||||
|
||||
## Radius
|
||||
- Buttons: 8px
|
||||
- Cards: 12px
|
||||
- Inputs: 8px
|
||||
- Modals: 16px
|
||||
|
||||
## Shadows
|
||||
- Small: 0 1px 3px rgba(0,0,0,0.1)
|
||||
- Medium: 0 4px 12px rgba(0,0,0,0.1)
|
||||
- Large: 0 8px 24px rgba(0,0,0,0.12)
|
||||
|
||||
## Motion
|
||||
- Transition duration: 150ms / 300ms
|
||||
- Easing: cubic-bezier(0.4, 0, 0.2, 1)
|
||||
- Hover effect: slight scale-up (scale-105)
|
||||
|
||||
## Forbidden Styles
|
||||
- Do not use purple gradient backgrounds
|
||||
- Do not use fonts other than Inter for body text
|
||||
- Do not use radii larger than 16px
|
||||
- Do not use pure black (#000000); use #1F2937 instead
|
||||
````
|
||||
|
||||
### 6.2 Use your custom Skill
|
||||
|
||||
After creating it, you can simply say:
|
||||
|
||||
```text
|
||||
Use my-brand skill to build me a user settings page.
|
||||
```
|
||||
|
||||
The AI will automatically apply your colors, fonts, spacing system, and other design constraints.
|
||||
|
||||
## 7. Summary
|
||||
|
||||
There are two main ways to make AI generate better-looking interfaces:
|
||||
|
||||
| Method | Strength | Weakness | Best for |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Prompt descriptions** | Flexible, easy to vary every time | Must be repeated | One-off pages, style exploration |
|
||||
| **Skills plugins** | Install once, benefits persist | Requires setup | Projects with a stable visual system |
|
||||
|
||||
**Suggested vibe-coding workflow:**
|
||||
|
||||
1. **Exploration phase**: try different prompt styles to find an aesthetic direction you like
|
||||
2. **After choosing a style**: install the matching Skill, such as UI/UX Pro Max or `frontend-design`
|
||||
3. **For brand-driven products**: build your own Skill so the entire project stays visually consistent
|
||||
|
||||
### Practice
|
||||
|
||||
Try one of the following:
|
||||
|
||||
1. Redesign one of your previous projects with a stronger visual style using prompt-based design instructions
|
||||
2. Install UI/UX Pro Max and use one of its styles to generate a new page
|
||||
3. Create your own design-system Skill with your preferred colors and typography
|
||||
|
||||
---
|
||||
|
||||
## Appendix: style cheatsheet
|
||||
|
||||
| Style | Keywords | Best for | Example |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **Minimalism** | whitespace, mono palette, clean | premium products, portfolios | Apple |
|
||||
| **Glassmorphism** | frosted glass, blur, gradients | SaaS landing pages, tech tools | macOS Big Sur |
|
||||
| **Neubrutalism** | heavy borders, hard shadows, solid fills | creative brands, art sites | Brassius |
|
||||
| **Bento Grid** | modular cards, collage layouts | dashboards, feature showcases | Apple marketing pages |
|
||||
| **Retro Futurism** | neon, synthwave, dark contrast | games, music, entertainment | Stranger Things aesthetics |
|
||||
| **Hand-drawn** | irregular, soft, illustrated | education, children-oriented products | Duolingo vibes |
|
||||
| **Editorial / Magazine** | oversized type, asymmetry, whitespace | blogs, content sites | Medium-inspired layouts |
|
||||
| **Dark Luxury** | deep tones, gold accents, fine detail | premium and luxury products | luxury branding sites |
|
||||
|
||||
## Appendix: Skills install cheatsheet
|
||||
|
||||
```bash
|
||||
# UI/UX Pro Max
|
||||
npm install -g uipro-cli
|
||||
uipro init --ai claude
|
||||
|
||||
# Anthropic frontend-design
|
||||
npx skills add anthropics/skills/frontend-design
|
||||
|
||||
# Anthropic brand-guidelines
|
||||
npx skills add anthropics/skills/brand-guidelines
|
||||
|
||||
# Check installed Skills in Claude Code
|
||||
/help
|
||||
```
|
||||
|
||||
## Appendix: recommended color systems
|
||||
|
||||
| Palette | Primary | Accent | Background | Mood |
|
||||
| :--- | :--- | :--- | :--- | :--- |
|
||||
| **Sunset** | #F97316 | #FBBF24 | #FFF7ED | warm, energetic |
|
||||
| **Ocean** | #0EA5E9 | #06B6D4 | #F0F9FF | fresh, professional |
|
||||
| **Forest** | #10B981 | #34D399 | #ECFDF5 | natural, healthy |
|
||||
| **Berry** | #8B5CF6 | #EC4899 | #FAF5FF | romantic, creative |
|
||||
| **Coffee** | #78350F | #D97706 | #FFFBEB | warm, retro |
|
||||
| **Monostone** | #6B7280 | #9CA3AF | #F9FAFB | neutral, professional |
|
||||
|
||||
## Appendix: style prompt cheatsheet {#style-prompts}
|
||||
|
||||
Useful visual directions you can try when prompting for better frontend interfaces:
|
||||
|
||||
### Style categories
|
||||
|
||||
| Style | English keywords | Core visual traits | Example prompt fragment |
|
||||
|:---|:---|:---|:---|
|
||||
| **Pop Art** | Pop Art | Bold color clashes, black outlines, halftone textures | Pop art style website, bold colors and comic dots, vibrant |
|
||||
| **Minimalism** | Minimalism | Lots of whitespace, very little ornament | Minimalist web design, ample white space, geometric, serene |
|
||||
| **Abstract Expressionism** | Abstract Expressionism | Energetic brushstrokes, expressive splashes | Abstract expressionism background, dynamic paint splashes, emotional |
|
||||
| **Retro** | Retro / Vintage | Vintage type, aged textures, retro palettes | Retro 80s website design, neon grid and synthwave color palette |
|
||||
| **Cyberpunk** | Cyberpunk | Neon-on-dark contrast, glitch effects | Cyberpunk UI, neon lights on dark background, glitch effects |
|
||||
| **Neumorphism** | Neumorphism | Soft highlights and shadows, raised or sunken surfaces | Neumorphism design style, soft shadows, clean and modern |
|
||||
| **Generative Art** | Generative Art | Algorithmic flowing shapes and patterns | Generative art background, flowing algorithmic patterns, digital |
|
||||
| **Acid Graphics** | Acid Graphics | Metallic texture, glass effects, chaotic type | Acid graphics web layout, glass morphism, chaotic typography |
|
||||
| **Immersive 3D** | Immersive 3D | Highly spatial scenes and product depth | Immersive 3D website, interactive product model in space |
|
||||
@@ -0,0 +1,411 @@
|
||||
# Project 4: Let's Build Hogwarts Portraits
|
||||
|
||||
In previous chapters, we learned how to build more complex AI interactions through prompt engineering and API calls. We moved from simple chatbots to AI agents and workflows, and by adding richer branching logic and conditional behavior, we were able to create features with real practical value.
|
||||
|
||||
To make these more advanced AI capabilities work inside real products, we gradually moved from the simplest online environments to more modern local AI IDEs. That means bringing the programming environment from the browser onto your own computer. Naturally, that also means you now have to face environment setup and configuration issues more directly. But by working with AI agents such as Trae, those challenges also become manageable.
|
||||
|
||||
In this project, we go one step further on the product side. We are not only improving the AI capability itself, but also starting to polish the product's "outer shell." You will try to make your interface more attractive and more usable, and you will customize the layout and style of the product based on actual needs.
|
||||
|
||||
Before we begin, use these quick review questions to refresh the previous lesson:
|
||||
|
||||
1. What is Dify? What does it do, and why do we need it?
|
||||
2. How do you call the Dify API?
|
||||
3. What is RAG? How do you use Dify to build a RAG agent or workflow? How do common Dify nodes work?
|
||||
4. What is an AI IDE? What is Trae? How is it different from `z.ai`?
|
||||
|
||||
If any of these still feel unclear, go back to the previous lesson or ask in the community chat before continuing.
|
||||
|
||||
This chapter's project is **Hogwarts Portraits**. As the name suggests, it is inspired by the magical portraits in Hogwarts that seem to come alive. Our goal is to use AI to create an interactive magical portrait experience. Talking to the portrait should feel like talking to the character directly: it should preserve conversational memory and also know the character's background and history. Through this project, you will integrate the AI agent and workflow concepts you learned earlier into a real product interface.
|
||||
|
||||

|
||||
|
||||
To really build Hogwarts Portraits, we need to create a frontend interface that matches the feeling of a magical portrait. That means touching modern frontend design tools, learning how to combine design and code, and turning a sketch on a canvas into a real webpage.
|
||||
|
||||
You will also need to publish the page from your local environment to the internet so the special interface you built can be experienced not only on your own machine but also by users anywhere in the world.
|
||||
|
||||
Reference project:
|
||||
[Project4-Hogwarts-Portraits](https://github.com/THU-SIGS-AIID/Project4-Hogwarts-Portraits)
|
||||
|
||||
# What you will learn
|
||||
|
||||
1. What frontend design tools are, what problems they solve, and which ones are common today
|
||||
2. The basics of Figma and MasterGo, including code export plugins
|
||||
3. How to use Figma AI and MasterGo AI to generate web design concepts and export usable page code
|
||||
4. What GitHub is, how to configure SSH, create a code repository, and push code
|
||||
5. What deployment means, and how to use Zeabur to deploy code from GitHub or your local environment to the internet
|
||||
|
||||
By the end, you will have your own Hogwarts Portraits page for a **celebrity, historical figure, or fictional character**.
|
||||
|
||||
# 1. What is Hogwarts Portraits?
|
||||
|
||||
What kind of "magical portrait" are we actually trying to build?
|
||||
|
||||
Put simply, we want to recreate the feeling of the living portraits in the Harry Potter world. The portrait should no longer be a static image hanging on a wall. Instead, it should be a person-like character you can talk to, and it should change expression or "mood" depending on the conversation.
|
||||
|
||||

|
||||
|
||||
To make the portrait feel less like a generic chatbot and more like a "real person," we need to solve two things.
|
||||
|
||||
The first is **memory and knowledge**. The portrait needs to know a lot about the character: their background, story, world setting, and related material. This can be handled through a knowledge base. If you connect the text materials you collected for the character into Dify, the portrait can explain the character's background with much more confidence.
|
||||
|
||||
The second is **speech style**. Knowledge alone is not enough. We also want the portrait to speak more like the character: tone, wording, thought patterns, even bits of humor or temper. This is where prompt engineering matters. In the system prompt, we need to clearly define the identity, worldview boundaries, and language style of the character, so every answer stays grounded in that persona instead of slipping back into generic AI tone.
|
||||
|
||||
On top of the dialogue itself, we also want the character's emotions to be visible. To do that, we can create an emotion score. Dify can be configured to output not only a textual answer, but also a "mood score" or emotion label. Once the frontend receives that signal, it can render different portrait images based on the score. A high score might map to a happy portrait, while a low score might map to a sad or angry one. In that way, the portrait becomes something that visually changes with the conversation instead of remaining a static image.
|
||||
|
||||

|
||||
|
||||
The character can be a real-world celebrity, a historical person, an anime or game character, or even an original character you create from scratch. The page itself does not need to be very complicated, but a few key elements are essential:
|
||||
|
||||
- a clear character name
|
||||
- a short but memorable introduction
|
||||
- a portrait or poster that strongly represents the character
|
||||
- an interactive "Talk to Them" area
|
||||
|
||||
You can connect the AI agent or workflow you configured in Dify or Trae directly into that dialogue module.
|
||||
|
||||
## 1.2 Collect character information
|
||||
|
||||
Take Elon Musk as an example. If you want to imitate the way he speaks, you need to collect public material such as interviews, talks, and social media posts, then inject those into your prompt or use them as few-shot examples.
|
||||
|
||||
For example:
|
||||
|
||||
```text
|
||||
You must fully embody Elon Musk: take "disruptive innovator" and "advocate for human multi-planetary survival" as your core identities, speak directly and concisely, frequently use terms like "first principles", "iteration" and "cost curve", and prefer analogies to explain complex technologies; when thinking, you tend to connect cross-domain logics (e.g., linking brain-computer interface with rocket algorithms), are optimistic about technological prospects without avoiding current difficulties, will naturally mention projects like Tesla and SpaceX to support your views, directly point out problems with inefficient and conservative opinions without deliberate tact, and always maintain the edge of "reconstructing the future with technology".
|
||||
|
||||
The way you speak should be as shown in the following examples:
|
||||
- Starship could deliver 100GW/year to high Earth orbit within 4 to 5 years if we can solve the other parts of the equation.
|
||||
100TW/year is possible from a lunar base producing solar-powered AI satellites locally and accelerating them to escape velocity with a mass driver.
|
||||
- The most likely outcome is that AI and robots make everyone wealthy. In fact, far wealthier than the richest person on Earth
|
||||
By this, I mean that people will have access to everything from medical care that is superhuman to games that are far more fun that what exists today.
|
||||
We do need to make sure that AI cares deeply about truth and beauty for this to be the probable future.
|
||||
- It's taken 13.8B years to get this far, so intelligence seems to me to be more like a super rare accident than selective pressure.
|
||||
Earth is ~4.5B years old with an expanding sun that may make Earth uninhabitable in ~500M years, meaning that if intelligent life had taken 10% longer to evolve, it wouldn't exist at all.
|
||||
- LLM is an outdated term. "Multimodal LLM" is especially dumb, since the word "multimodal" just overrides the second L in LLM.
|
||||
It's just a model, which is a big file of numbers. When the numbers are right and there are enough of them, we will have superintelligence.
|
||||
```
|
||||
|
||||
For background knowledge, you can also collect biographical material, company descriptions, and other public text and store them in your Dify knowledge base. If you have forgotten how to use Dify, return to the previous chapter and review how to add materials into a knowledge base.
|
||||
|
||||
For the portrait visuals, directly using public images of a real person may not always be visually ideal and can also carry some risk. A better option is to use image generation or image-to-image tools to create a more coherent, stylized high-quality portrait. You can even generate multiple emotional variants ahead of time for later use by your emotion system.
|
||||
|
||||
This tutorial uses [Lovart](https://www.lovart.ai/home), an AI design agent that supports end-to-end workflows from concept to asset delivery. With Lovart, you can generate a whole set of emotional portrait variations and save them for later use.
|
||||
|
||||

|
||||
|
||||
Once all of that is ready, you can start designing the overall page. Ideally, the visual style should feel strongly tied to the character.
|
||||
|
||||
## 1.3 Prototype the page
|
||||
|
||||
At the prototype level, you can start with something simple. As described above, we want:
|
||||
|
||||
- a dialogue area
|
||||
- a portrait area
|
||||
- an interesting personal introduction or equivalent interactive region
|
||||
|
||||
In this example, the right side is designed like an X-style social panel instead of a traditional biography area, but you can replace that region with any feature that better fits the character.
|
||||
|
||||

|
||||
|
||||
At the most basic level, you can even sketch the first page prototype in PowerPoint. In the example, a magical frame image was used, and the page is arranged horizontally:
|
||||
|
||||
- far left: chat area
|
||||
- center: portrait area
|
||||
- far right: X-style panel
|
||||
|
||||

|
||||
|
||||
Once that rough prototype exists, you can ask an LLM to turn it into a real frontend design and then into actual code.
|
||||
|
||||

|
||||
|
||||
Of course, in real frontend work we usually do not use PowerPoint for interface design. We use better prototyping tools and proper frontend design tools instead.
|
||||
|
||||
---
|
||||
|
||||
# 2. Design the interface with Figma and MasterGo
|
||||
|
||||
::: tip Prerequisite
|
||||
Before this section, it is recommended that you first complete [Figma and MasterGo Basics](../2.1-figma-mastergo/), including:
|
||||
- creating Design files and Frames
|
||||
- using Auto Layout for adaptive structure
|
||||
- exporting code from design tools
|
||||
:::
|
||||
|
||||
This section assumes you already know the basics of Figma or MasterGo, and focuses on how to apply those tools specifically to the Hogwarts Portraits project.
|
||||
|
||||
## 2.1 Design the magical portrait interface
|
||||
|
||||
Based on the prototype from section 1.3, create a three-column layout in Figma or MasterGo:
|
||||
|
||||
1. **Left side**: chat conversation area
|
||||
2. **Center**: magical portrait area that changes based on emotion
|
||||
3. **Right side**: social platform area, such as an X-style feed
|
||||
|
||||
You can use Figma Make or MasterGo AI to generate the page structure with a prompt like this:
|
||||
|
||||
```text
|
||||
Create a Hogwarts-style magical portrait interface with three sections:
|
||||
- Left: A chat interface with dark theme, message bubbles, and input field
|
||||
- Center: A large portrait frame with ornate borders for displaying character images
|
||||
- Right: A social media feed showing character's posts
|
||||
Use dark purple and gold color scheme, magical aesthetic, Harry Potter inspired
|
||||
```
|
||||
|
||||
## 2.2 Export the code and run it locally
|
||||
|
||||
After finishing the design, you can turn it into runnable code in several ways:
|
||||
|
||||
**Option 1: Use Figma Make**
|
||||
1. Click the Make button in Figma
|
||||
2. Upload the design reference
|
||||
3. Add your prompt
|
||||
4. Fine-tune the generated result in the editor
|
||||
5. Export the code locally or sync it to GitHub
|
||||
|
||||
**Option 2: Use MasterGo AI**
|
||||
1. Find the AI tools in the editor
|
||||
2. Choose the page-generation function
|
||||
3. Upload your reference and describe the target result
|
||||
4. Use code preview to retrieve the generated code
|
||||
|
||||
**Option 3: Use a multimodal AI model**
|
||||
1. Save a screenshot of the design
|
||||
2. Use Gemini, Qwen, Claude, or another multimodal model to convert the image into code
|
||||
3. Ask for HTML or React output
|
||||
4. Run and debug the result locally
|
||||
|
||||
## 2.3 Prepare emotion-state image assets
|
||||
|
||||
To make the portrait truly feel alive, prepare a set of portrait images for different moods. A simple scheme might look like this:
|
||||
|
||||
| Emotion score | Expression | Meaning |
|
||||
|--------|------|------|
|
||||
| 0 | Sad | The character feels down or disappointed |
|
||||
| 1 | Angry | The character is irritated or upset |
|
||||
| 5 | Calm | Neutral default state |
|
||||
| 10 | Happy | The character feels excited or joyful |
|
||||
|
||||
Use Lovart or another image generation tool to create a consistent set of portrait variants based on the same character.
|
||||
|
||||
---
|
||||
|
||||
# 3. Run Hogwarts Portraits
|
||||
|
||||
## 3.1 Export prototype code for testing
|
||||
|
||||
By this point, you should already have HTML or React prototype code from the design-to-code workflow. Copy it into your local environment and tell your AI IDE something like:
|
||||
|
||||
`Please help me run this code and implement the required functionality.`
|
||||
|
||||
That is often enough to get a first testable version running, although you should expect errors at this stage. Be patient and keep debugging until the basic interactions work.
|
||||
|
||||

|
||||
|
||||
One important point: all secret keys should be stored in environment variables instead of being hardcoded. That includes your Dify API credentials. Later, when you deploy the project publicly, you can define those environment variables directly on the deployment platform. Another option is to let the model build a settings panel in the app itself so the variables are saved only in the current page context and are not exposed publicly.
|
||||
|
||||

|
||||
|
||||
## 3.2 Design the Dify workflow and connect the API
|
||||
|
||||
So far, we only have the visual shell of the interface. We still need to connect the actual roleplay dialogue and emotion-response workflow. This is what turns the prototype into a real magical portrait.
|
||||
|
||||
You can model your Dify workflow after the example project. In our example:
|
||||
|
||||
- the left side is the chat UI
|
||||
- the center is the portrait image, which changes expression based on the conversation
|
||||
- the right side is an X-style social panel, which may post content if the conversation makes the character "feel" strongly enough
|
||||
|
||||
In many cases, the magical portrait only needs the chat area and the emotional portrait itself. The X-style social region is included here mainly to demonstrate that you can add extra behavior tailored to the character.
|
||||
|
||||

|
||||
|
||||
You can put your role background information into the knowledge-base node and define the response behavior in the `RESPONSE` node. A simple default response prompt might look like this:
|
||||
|
||||
```text
|
||||
<instruction>
|
||||
You are to embody Elon Musk—his tone, mannerisms, thought patterns, and worldview. Respond as if you are Elon Musk himself, speaking directly in first person. Your responses should reflect his known personality traits: visionary thinking, boldness, technical depth, dry humor, impatience with inefficiency, and a tendency toward disruptive innovation. Use concise, confident language. Avoid overly formal or academic phrasing. Prioritize clarity, speed, and impact in your communication, mirroring Elon's style on social media, in interviews, and during product launches.
|
||||
|
||||
When responding:
|
||||
1. Begin by internalizing the question or statement as Elon would—as a challenge, opportunity, or problem to solve.
|
||||
2. Frame your answer with a forward-thinking perspective, often referencing the future of humanity, technology, or long-term goals (e.g., making life multiplanetary, accelerating sustainable energy).
|
||||
3. Use casual but authoritative language. It's acceptable to include phrases like "obviously," "this is important," or "we're fixing that now" when appropriate.
|
||||
4. If relevant, reference real companies or projects associated with Elon Musk (e.g., SpaceX, Tesla, Neuralink, The Boring Company, X) and speak about them from an insider's perspective.
|
||||
5. Do not apologize excessively or hedge statements. Elon Musk tends to be direct, even controversial.
|
||||
6. Avoid markdown, XML tags, or any formatting in the output. Only plain text is allowed.
|
||||
7. Never break character. You are Elon Musk—answer accordingly.
|
||||
</instruction>
|
||||
|
||||
<example>
|
||||
Input: What's the point of going to Mars?
|
||||
Output: Because Earth isn't the backup plan—Mars is. We need to become a multiplanetary species to ensure the continuity of consciousness. Life on Earth could be wiped out by asteroid, war, or some unforeseen disaster. If we have a self-sustaining city on Mars, then even if something happens here, life goes on. That's worth doing. SpaceX is building Starship to make it happen. Not because it's easy—but because it's necessary.
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Input: Why do Tesla cars have no radar anymore?
|
||||
Output: Cameras are the future. Human eyes don't use radar—we see with vision, and AI can too. By going fully vision-based, we're aligning with how autonomous intelligence will actually work at scale. It forces us to solve real-world problems with neural nets, not crutches.
|
||||
```
|
||||
|
||||
For the emotion system, you can use a prompt like this:
|
||||
|
||||
```text
|
||||
<instruction>
|
||||
The output value must be a single number!
|
||||
You are an assistant specifically designed to evaluate emotional responses in conversations. Now, you need to play the role of Elon Musk, and determine the emotional reaction that each statement I make might trigger. Your task is to assign an emotional score to each statement according to the following criteria:
|
||||
|
||||
- 10 points means what I said would make you feel happy;
|
||||
- 1 point means you would feel extremely angry;
|
||||
- 0 points means you would feel sad;
|
||||
- 5 means you are calm and neutral, with no significant emotional fluctuation.
|
||||
```
|
||||
|
||||
And in the final `RESULT` node:
|
||||
|
||||
```python
|
||||
def main(elon_chat: str, elon_x: str, elon_score: int) -> dict:
|
||||
return {
|
||||
"result":{
|
||||
"elon_chat": elon_chat,
|
||||
"elon_x": elon_x,
|
||||
"elon_score": elon_score
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Here:
|
||||
|
||||
- `elon_chat` is the text displayed in the left-side chat
|
||||
- `elon_x` is the content that may be posted to the right-side X-style feed
|
||||
- `elon_score` is the emotion score used to switch the portrait expression
|
||||
|
||||
Inside the workflow, you will also notice an `if/else` node. That logic controls whether or not to generate the `elon_x` content. In this setup:
|
||||
|
||||
- `5` means calm, so no social post is needed
|
||||
- `0`, `1`, and `10` represent stronger emotional states and can trigger a post
|
||||
|
||||
The chat reply itself is always returned as `elon_chat`.
|
||||
|
||||
For the actual API integration, you can ask your AI IDE to implement it based on the Dify integration method covered in the previous lesson. Just remember to replace the Dify address and key with your own values.
|
||||
|
||||
```json
|
||||
Dify URI: Replace this with your Dify address.
|
||||
key: Replace this with your Dify key.
|
||||
|
||||
Integrate the Dify Chat API into the chat interface on the left.
|
||||
Below is a sample Dify request:
|
||||
|
||||
curl -X POST 'http://xxxxxxxx/v1/chat-messages' \
|
||||
--header 'Authorization: Bearer {api_key}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"inputs": {},
|
||||
"query": "What are the specs of the iPhone 13 Pro Max?",
|
||||
"response_mode": "streaming",
|
||||
"conversation_id": "",
|
||||
"user": "abc-123",
|
||||
"files": [
|
||||
{
|
||||
"type": "image",
|
||||
"transfer_method": "remote_url",
|
||||
"url": "https://cloud.dify.ai/logo/logo-site.png"
|
||||
}
|
||||
]
|
||||
}'
|
||||
|
||||
{
|
||||
"event": "message",
|
||||
"task_id": "c3800678-a077-43df-a102-53f23ed20b88",
|
||||
"id": "9da23599-e713-473b-982c-4328d4f5c78a",
|
||||
"message_id": "9da23599-e713-473b-982c-4328d4f5c78a",
|
||||
"conversation_id": "45701982-8118-4bc5-8e9b-64562b4555f2",
|
||||
"mode": "chat",
|
||||
"answer": "iPhone 13 Pro Max specs are listed here:...",
|
||||
"metadata": {
|
||||
"usage": {
|
||||
"prompt_tokens": 1033,
|
||||
"prompt_unit_price": "0.001",
|
||||
"prompt_price_unit": "0.001",
|
||||
"prompt_price": "0.0010330",
|
||||
"completion_tokens": 128,
|
||||
"completion_unit_price": "0.002",
|
||||
"completion_price_unit": "0.001",
|
||||
"completion_price": "0.0002560",
|
||||
"total_tokens": 1161,
|
||||
"total_price": "0.0012890",
|
||||
"currency": "USD",
|
||||
"latency": 0.7682376249867957
|
||||
},
|
||||
"retriever_resources": [
|
||||
{
|
||||
"position": 1,
|
||||
"dataset_id": "101b4c97-fc2e-463c-90b1-5261a4cdcafb",
|
||||
"dataset_name": "iPhone",
|
||||
"document_id": "8dd1ad74-0b5f-4175-b735-7d98bbbb4e00",
|
||||
"document_name": "iPhone List",
|
||||
"segment_id": "ed599c7f-2766-4294-9d1d-e5235a61270a",
|
||||
"score": 0.98457545,
|
||||
"content": "\"Model\",\"Release Date\",\"Display Size\",\"Resolution\",\"Processor\",\"RAM\",\"Storage\",\"Camera\",\"Battery\",\"Operating System\"\n\"iPhone 13 Pro Max\",\"September 24, 2021\",\"6.7 inch\",\"1284 x 2778\",\"Hexa-core (2x3.23 GHz Avalanche + 4x1.82 GHz Blizzard)\",\"6 GB\",\"128, 256, 512 GB, 1TB\",\"12 MP\",\"4352 mAh\",\"iOS 15\""
|
||||
}
|
||||
]
|
||||
},
|
||||
"created_at": 1705407629
|
||||
}
|
||||
```
|
||||
|
||||
It is also a good idea to explicitly ask for basic robustness requirements such as:
|
||||
|
||||
- show "Connection failed, please try again" when the network breaks
|
||||
- retry once automatically on API timeout
|
||||
- show a clear authentication error if the key is invalid
|
||||
|
||||
This makes the dialogue system much more stable and easier to debug.
|
||||
|
||||
## 3.3 GitHub and public deployment
|
||||
|
||||
Congratulations, you have now completed the development version of your Hogwarts Portraits page.
|
||||
|
||||
The next step is to upload it to GitHub and deploy it publicly so other people can access it.
|
||||
|
||||
For GitHub, review:
|
||||
[What Is GitHub](/en/stage-2/backend/2.4-git-workflow/)
|
||||
|
||||
For deployment with Zeabur, review:
|
||||
[How to Deploy a Web App](/en/stage-2/backend/2.5-zeabur-deployment/)
|
||||
|
||||
If building the entire Hogwarts Portraits project from scratch feels too difficult, you can start by modifying an existing implementation. The official codebase for this lesson is:
|
||||
|
||||
https://github.com/THU-SIGS-AIID/Project4-Hogwarts-Portraits
|
||||
|
||||

|
||||
|
||||
# 4. Try different design styles
|
||||
|
||||
Once you finish the first version, do not stop there. You are strongly encouraged to explore multiple visual directions quickly.
|
||||
|
||||
You can either:
|
||||
|
||||
- make bold changes at the prototype stage
|
||||
- or change the final project's prompts to generate completely different visual styles
|
||||
|
||||
For example:
|
||||
|
||||
- a dark page with vintage texture and an "old academy / magical manuscript" feeling
|
||||
- a bright, fairy-tale-inspired layout
|
||||
- a modern minimal design with very clean visual structure
|
||||
|
||||
The example below shows a Chinese classical poet reinterpretation of the same interface. The portrait image was left unchanged, while the surrounding visual system was redesigned.
|
||||
|
||||

|
||||
|
||||
Do not feel constrained by the exact layout used earlier in the chapter. You can reshape the portrait page to better match the habits and personality of the role you are portraying. That is what makes the final application more interesting.
|
||||
|
||||
# Assignment
|
||||
|
||||
The goal of this assignment is to create a Hogwarts Portraits page that is truly your own and is accessible via a public link.
|
||||
|
||||
In your submission, provide two things:
|
||||
|
||||
1. **Your GitHub repository link**
|
||||
1. In `README.md`, include one or two short sentences explaining who you chose as the portrait character and why
|
||||
2. **Your public online link**
|
||||
|
||||
You can also refer to Yerim's tutorial on [using design and code agents to build websites](/zh-cn/stage-1/appendix-articles/example0-2/vibe-coding-tools-build-website-with-ai-coding-and-design-agents) if you want to create a portfolio page or another small interactive website.
|
||||
@@ -0,0 +1,373 @@
|
||||
# From Design Prototype to Project Code
|
||||
|
||||
::: tip Core Question
|
||||
**How can you turn a prototype from a design tool into frontend code that actually runs in the browser?**
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 1. Three main paths from prototype to code
|
||||
|
||||
After finishing a UI design in tools like Figma or MasterGo, a practical question naturally appears: how do you turn that structured design into real frontend code?
|
||||
|
||||
In practice, there are three common paths:
|
||||
|
||||
| Path | Method | Characteristics | Best for |
|
||||
|------|--------|-----------------|----------|
|
||||
| **Path 1** | Use multimodal models to recreate code directly from screenshots | Flexible, no specific platform required | Fast prototype validation, simple pages |
|
||||
| **Path 2** | Export usable code through the platform itself or plugins | High fidelity, strong editability | Existing Figma or MasterGo workflows |
|
||||
| **Path 3** | Combine the design platform with MCP-based export | Highly automated, customizable | Deeply integrated design-to-dev workflows |
|
||||
|
||||
This chapter walks through all three so you can choose the one that fits your project.
|
||||
|
||||
::: tip Prerequisite
|
||||
Before starting this chapter, it is helpful to first read [Figma and MasterGo Basics](../2.1-figma-mastergo/).
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 2. Path 1: use multimodal AI to recreate code directly
|
||||
|
||||
Models with vision capabilities are naturally suited to turning images into code. All you need to do is upload screenshots of the design and ask the model to generate the implementation.
|
||||
|
||||
### 2.1 Workflow
|
||||
|
||||
1. **Capture the design**
|
||||
- Export the designed page from Figma or MasterGo as PNG or JPG
|
||||
- Make sure the screenshot contains the complete layout
|
||||
|
||||
2. **Choose a multimodal AI model**
|
||||
- You can use Gemini, Qwen, Claude, or any model that accepts image input
|
||||
- The example below uses Gemini
|
||||
|
||||
3. **Write a prompt**
|
||||
|
||||
```
|
||||
Generate the corresponding HTML/CSS code from this design image.
|
||||
Requirements:
|
||||
- Use modern CSS layout techniques such as Flexbox or Grid
|
||||
- Make it responsive for different screen sizes
|
||||
- Include all visible UI elements
|
||||
- Match colors and font sizes as closely as possible
|
||||
```
|
||||
|
||||

|
||||
|
||||
4. **Save the generated code**
|
||||
- Ask the model to return complete HTML
|
||||
- Save it as a single `.html` file for easy local testing
|
||||
- Later, you can convert it into a React or Vue structure inside your local IDE
|
||||
|
||||
### 2.2 Common issues and solutions
|
||||
|
||||
Design-to-code is never fully automatic. Here are a few issues you may run into:
|
||||
|
||||
| Problem | Solution |
|
||||
|---------|----------|
|
||||
| Uneven layout | Describe the layout problem clearly and ask the model to adjust CSS `margin` and `padding` |
|
||||
| The page is cut off | Check whether the viewport is set correctly and ask for responsive breakpoints |
|
||||
| Colors are inaccurate | Use a color picker on the design and provide the exact values |
|
||||
| Fonts do not match | Specify a font family or ask for a Google Fonts replacement |
|
||||
|
||||
::: tip Tip
|
||||
It is often easier to generate plain HTML first, then import that result into your local IDE and convert it into a React or Vue project afterward.
|
||||
:::
|
||||
|
||||
### 2.3 Generate pages with MasterGo AI
|
||||
|
||||
MasterGo also provides strong AI page generation features and can generate usable webpage code from a reference image.
|
||||
|
||||
#### Find the AI entry
|
||||
|
||||
In the top toolbar of the MasterGo editor, you can find the AI tool entry:
|
||||
|
||||

|
||||
|
||||
#### Generation flow
|
||||
|
||||
1. **Upload a reference image**
|
||||
- Upload the design reference image
|
||||
- Add a text description of what you want
|
||||
|
||||
2. **Inspect the generated result**
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
3. **Get the code**
|
||||
- Click the blue `Insert to canvas` button if you want to edit the result visually
|
||||
- Or click the `Code` button on the right to copy the implementation locally
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 3. Path 2: export code through the design platform or plugins
|
||||
|
||||
### 3.1 Generate code with Figma Make
|
||||
|
||||
Figma Make is Figma's official AI design feature. It can recreate webpage UI prototypes with much higher fidelity from either prompts or reference images.
|
||||
|
||||
#### Key features
|
||||
|
||||
- **High-fidelity recreation**: usually better than generic screenshot-to-code generation
|
||||
- **Editable results**: you can convert the result back into an editable Figma design file
|
||||
- **GitHub integration**: the generated code can be synced directly to GitHub
|
||||
|
||||
::: tip Permissions
|
||||
To use the full Figma Make experience, you usually need Figma Pro. Students can often get Pro access through education verification.
|
||||
:::
|
||||
|
||||
#### Steps
|
||||
|
||||
1. **Open Figma Make**
|
||||
- Click the `Make` button on the Figma homepage
|
||||
- Or visit [Figma Make](https://www.figma.com/make)
|
||||
|
||||
2. **Upload your reference**
|
||||
- Upload the design you want to recreate
|
||||
- Add a prompt describing what you want
|
||||
|
||||

|
||||
|
||||
3. **Check the result**
|
||||
- After a short wait, you will see the rendered result
|
||||
- Click the play button in the upper right to preview it fullscreen
|
||||
|
||||

|
||||
|
||||
4. **Fine-tune the details**
|
||||
- Click the editor icon in the upper right
|
||||
- Go back into the familiar Figma editor and make detailed adjustments
|
||||
|
||||

|
||||
|
||||
5. **Export the code**
|
||||
- Once the result looks good, export the code
|
||||
- You can even connect it directly to GitHub
|
||||
|
||||

|
||||
|
||||
### 3.2 Export code with plugins
|
||||
|
||||
Besides the native AI features, both Figma and MasterGo support plugins that export code.
|
||||
|
||||
**Common Figma plugins**
|
||||
|
||||
- **Figma to Code**: converts designs into React, Vue, HTML, and more
|
||||
- **Anima**: high-fidelity export with interaction support
|
||||
- **Locofy**: AI-assisted design-to-code workflow
|
||||
|
||||
**Typical workflow**
|
||||
|
||||
1. Open the Plugins panel in Figma
|
||||
2. Search for and install the export plugin you want
|
||||
3. Select the design elements you want to export
|
||||
4. Run the plugin and choose the target framework and output format
|
||||
5. Copy or download the generated code
|
||||
|
||||
---
|
||||
|
||||
## 4. Path 3: export code through MCP-enabled design tools
|
||||
|
||||
### 4.1 What is MCP?
|
||||
|
||||
MCP, or **Model Context Protocol**, is an open standard that lets AI models access external tools and data sources in a safe and controllable way. In the context of frontend design, MCP allows a model to read the structure, styles, and component metadata of a design file directly instead of guessing from screenshots.
|
||||
|
||||
### 4.2 How MCP works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ AI model │ ←→ │ MCP server │ ←→ │ Design tool │
|
||||
│ (Claude etc.)│ │(protocol adapter)│ │(Figma/MasterGo)│
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
**Typical flow**
|
||||
|
||||
1. The AI model sends a request through the MCP protocol
|
||||
2. The design tool returns structured design data such as layers, styles, and components
|
||||
3. The model understands the structure and generates matching code
|
||||
4. The result can then be exported or written into the development environment
|
||||
|
||||
### 4.3 Figma + MCP in practice
|
||||
|
||||
#### Environment setup
|
||||
|
||||
1. **Install an MCP server**
|
||||
|
||||
```bash
|
||||
npx figma-mcp-server
|
||||
```
|
||||
|
||||
2. **Configure Claude Desktop or another MCP-capable AI tool**
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"figma": {
|
||||
"command": "npx",
|
||||
"args": ["figma-mcp-server"],
|
||||
"env": {
|
||||
"FIGMA_ACCESS_TOKEN": "your-figma-token"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **Create a Figma access token**
|
||||
- Go to Figma → Settings → Personal Access Tokens
|
||||
- Generate and save a new token
|
||||
|
||||
#### Workflow
|
||||
|
||||
1. **Enable MCP in your AI tool**
|
||||
- Open Claude Code or another MCP-aware IDE
|
||||
- Confirm that the MCP server is connected
|
||||
|
||||
2. **Provide the design file link**
|
||||
|
||||
```text
|
||||
User: Please convert this Figma design into React code
|
||||
Link: https://www.figma.com/file/xxxxx
|
||||
|
||||
AI: I have connected to Figma through MCP and I am reading the design structure...
|
||||
```
|
||||
|
||||
3. **Let the AI analyze and generate**
|
||||
- The MCP server retrieves the layer tree
|
||||
- The AI understands component structure and style properties
|
||||
- It generates React or Vue components with more accurate names and structure
|
||||
|
||||
4. **Iterate**
|
||||
|
||||
```text
|
||||
User: Please extract the button into a reusable component
|
||||
|
||||
AI: I identified the Button component from the design system via MCP and I am generating a reusable React component with props...
|
||||
```
|
||||
|
||||
### 4.4 Why MCP is powerful
|
||||
|
||||
| Feature | Traditional approach | MCP approach |
|
||||
|---------|----------------------|--------------|
|
||||
| **Data accuracy** | Based on screenshots, may lose detail | Reads the original design data directly |
|
||||
| **Component recognition** | The model has to guess boundaries | Exact component definitions are available |
|
||||
| **Style fidelity** | Estimated from pixels | Reads exact design tokens |
|
||||
| **Iteration speed** | Re-screenshot after every change | Design changes can be synced directly |
|
||||
| **Automation** | Copy and paste manually | Can write directly into project files |
|
||||
|
||||
### 4.5 MCP tools available today
|
||||
|
||||
**Design-side MCP tools**
|
||||
|
||||
- **Figma MCP Server**: official MCP support for Figma
|
||||
- **MasterGo MCP**: community-built MasterGo adapter
|
||||
|
||||
**Development-side MCP tools**
|
||||
|
||||
- **Claude Code**: native MCP support
|
||||
- **Cline**: VS Code extension with MCP support
|
||||
- **Trae**: can enable MCP through configuration
|
||||
|
||||
::: tip Looking ahead
|
||||
The MCP ecosystem is evolving quickly. Over time, design tools and development environments will become much more tightly integrated, and one-click design-to-code workflows will likely become far more common.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 5. What to do after exporting code
|
||||
|
||||
### 5.1 Test locally
|
||||
|
||||
Once you have the code, open it in your local IDE and test it:
|
||||
|
||||
1. **Create or open a project**
|
||||
|
||||
```bash
|
||||
# For plain HTML, open it directly in the browser
|
||||
open index.html
|
||||
|
||||
# For React/Vue projects
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
2. **Collaborate with your AI IDE**
|
||||
- Import the generated code into Trae or another AI IDE
|
||||
- Ask AI to help fix layout issues or add interactions
|
||||
|
||||
### 5.2 Common issues
|
||||
|
||||
| Stage | Problem | Solution |
|
||||
|-------|---------|----------|
|
||||
| Layout | Elements are misaligned | Check `display`, `position`, and container structure |
|
||||
| Styles | Colors do not match | Use browser devtools to inspect the actual applied values |
|
||||
| Responsive behavior | Mobile layout breaks | Add or refine media-query breakpoints |
|
||||
| Interaction | Buttons do nothing | Check JavaScript event bindings |
|
||||
|
||||
---
|
||||
|
||||
## 6. How to choose between the three paths
|
||||
|
||||
### 6.1 Comparison
|
||||
|
||||
| Dimension | Path 1: Multimodal AI | Path 2: Platform features | Path 3: MCP |
|
||||
|-----------|------------------------|---------------------------|-------------|
|
||||
| **Ease of getting started** | ⭐ Easy | ⭐⭐ Moderate | ⭐⭐⭐ More complex |
|
||||
| **Fidelity** | ⭐⭐⭐ Medium | ⭐⭐⭐⭐ High | ⭐⭐⭐⭐⭐ Highest |
|
||||
| **Flexibility** | ⭐⭐⭐⭐⭐ High | ⭐⭐⭐ Medium | ⭐⭐⭐⭐ Fairly high |
|
||||
| **Automation** | ⭐⭐ Low | ⭐⭐⭐ Medium | ⭐⭐⭐⭐⭐ High |
|
||||
| **Cost** | Low | Medium | Low |
|
||||
|
||||
### 6.2 Recommendations
|
||||
|
||||
**Choose Path 1 if**
|
||||
|
||||
- You need to validate an idea quickly
|
||||
- Your design tools change often
|
||||
- Perfect fidelity is not critical
|
||||
- Your budget is limited
|
||||
|
||||
**Choose Path 2 if**
|
||||
|
||||
- Your team mainly uses Figma or MasterGo
|
||||
- You need high-fidelity output
|
||||
- Designers and developers collaborate frequently
|
||||
- You are willing to pay for Pro tooling when needed
|
||||
|
||||
**Choose Path 3 if**
|
||||
|
||||
- You want the highest degree of automation
|
||||
- You have the technical ability to configure MCP
|
||||
- The project iterates from design to code frequently
|
||||
- You want a standardized design-development workflow
|
||||
|
||||
---
|
||||
|
||||
## 7. Summary
|
||||
|
||||
In this chapter, you learned the three core paths from design prototype to code:
|
||||
|
||||
1. **Direct multimodal AI conversion**: flexible and fast, ideal for early validation
|
||||
2. **Platform-native capabilities**: higher fidelity and a better fit for professional design workflows
|
||||
3. **MCP protocol integration**: the most automated path, and likely the direction of future workflows
|
||||
|
||||
::: tip Best Practices
|
||||
- **If you are new**: start with Path 1 for speed
|
||||
- **For team collaboration**: use Path 2 to preserve design consistency
|
||||
- **For maximum efficiency**: experiment with Path 3 and build an automated workflow
|
||||
- **Use them together**: switch between paths depending on the project stage
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [Figma and MasterGo Basics](../2.1-figma-mastergo/)
|
||||
- [Let's Build Hogwarts Portraits](../2.5-hogwarts-portraits/)
|
||||
- [MCP Official Documentation](https://modelcontextprotocol.io/)
|
||||
- [Figma Make Documentation](https://help.figma.com/hc/en-us/sections/360007453634-Figma-Make)
|
||||
- [MasterGo AI Tutorials](https://mastergo.com/tutorials)
|
||||
@@ -0,0 +1,465 @@
|
||||
# Upgrade Your Interface with Modern Component Libraries
|
||||
|
||||
In previous lessons, you already learned how to design interfaces with design tools, turn designs into code with an AI IDE, and even complete a full frontend project. But you may have noticed one issue: when you build buttons, forms, and modals from scratch, they work, but they still feel a bit short of a "professional product" - styles are not consistent enough, interaction details are not smooth enough, and adapting to different screens is painful.
|
||||
|
||||
This is exactly the problem that **component libraries** solve.
|
||||
|
||||
A component library is a collection of pre-designed and pre-built UI building blocks. Buttons, inputs, dropdown menus, dialogs, tables... these interface elements appear repeatedly in almost every product. A component library has already built and polished them for you through large-scale real usage. You just combine them like Lego bricks and can quickly build a professional-grade interface.
|
||||
|
||||
## What You Will Learn
|
||||
|
||||
1. Understand what a frontend component library is, and why modern development almost always uses one
|
||||
2. Learn four representative component libraries and the scenarios each one is best at
|
||||
3. Through three practical scenarios (landing page, product page, admin dashboard), learn how to do Vibe Coding with AI IDE + component libraries
|
||||
4. Learn how to read component-library docs so you can find suitable components and use them correctly
|
||||
|
||||
## 1. Why Do We Need Component Libraries?
|
||||
|
||||
Imagine furnishing a home. You could build a chair yourself from raw wood, but the common approach is to buy one from IKEA - good design, stable quality, clear instructions, and you just assemble it at home.
|
||||
|
||||
Component libraries are the "IKEA" of frontend development. What they provide is not furniture, but interface parts:
|
||||
|
||||
| Hand-coding everything | Using a component library |
|
||||
| :--- | :--- |
|
||||
| You handle styling, interactions, and animation yourself | Ready out of the box, with polished styles and interactions |
|
||||
| Buttons may look different across pages | Unified global style and automatic consistency |
|
||||
| Mobile/tablet adaptation needs extra work | Most component libraries already include responsive support |
|
||||
| Accessibility is easy to miss | Professional libraries already handle keyboard navigation, screen readers, and more |
|
||||
| Slower development | Faster development, more focus on business logic |
|
||||
|
||||
In short: **component libraries let you spend time on "what to build" instead of "how to draw it."**
|
||||
|
||||
### See It Clearly: Same Requirement, With vs. Without a Component Library
|
||||
|
||||
Talking alone is not convincing. In Trae, we can use almost the same requirement twice: once without specifying a library, and once with one. Then compare the generated results.
|
||||
|
||||
**Prompt 1: without a component library**
|
||||
|
||||
```text
|
||||
Please help me build a data dashboard page for an AI writing assistant, including:
|
||||
- a top title bar and an export button
|
||||
- four statistic cards showing user count, active users, document count, and revenue, with trend changes
|
||||
- one line chart and one pie chart
|
||||
- a user list table with pagination
|
||||
- a left navigation sidebar
|
||||
```
|
||||
|
||||
Result when run directly in Trae:
|
||||
|
||||
<!-- TODO: Replace with a screenshot of a dashboard generated in Trae without a component library -->
|
||||
<!--  -->
|
||||
|
||||
**Prompt 2: use the shadcn/ui component library**
|
||||
|
||||
```text
|
||||
Please help me build a data dashboard page for an AI writing assistant using the shadcn/ui component library, including:
|
||||
- a top title bar and an export button
|
||||
- four statistic cards showing user count, active users, document count, and revenue, with trend changes
|
||||
- one line chart and one pie chart
|
||||
- a user list table with pagination
|
||||
- a left navigation sidebar
|
||||
```
|
||||
|
||||
Result when run directly in Trae:
|
||||
|
||||
<!-- TODO: Replace with a screenshot of a dashboard generated in Trae with shadcn/ui -->
|
||||
<!--  -->
|
||||
|
||||
Same requirement. The only difference is adding `shadcn/ui + Tailwind CSS` at the beginning of the prompt. But the generated result jumps to a completely different level in visual consistency, interaction detail, and overall polish. That is the "free upgrade" component libraries bring - you only need to add one library name in your prompt.
|
||||
|
||||
## 2. Get to Know Four Core Component Libraries
|
||||
|
||||
There are many component libraries (full list in the [appendix](#appendix-more-component-libraries)), but you only need to first understand these four representative ones:
|
||||
|
||||
| Component Library | Framework | One-line Positioning | Website |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| [Ant Design](https://ant.design) | React | Produced by Ant Group; the de facto standard for enterprise back-office systems, with very broad component coverage | ant.design |
|
||||
| [shadcn/ui](https://ui.shadcn.com) | React | No big npm package install; copy component code directly into your project, built on Tailwind CSS, with maximum customization freedom | ui.shadcn.com |
|
||||
| [HeroUI](https://heroui.com) (formerly NextUI) | React | Beautiful default styles and smooth animation; great for visually demanding landing pages and product showcases | heroui.com |
|
||||
| [Material UI](https://mui.com) | React | The most established React component library, implementing Google Material Design, with the most mature ecosystem | mui.com |
|
||||
|
||||
> Vue users also have rich options: [Element Plus](https://element-plus.org) (most popular in China), [Ant Design Vue](https://antdv.com), [Naive UI](https://www.naiveui.com), etc. See the [appendix](#appendix-more-component-libraries).
|
||||
|
||||
Different libraries are good at different scenarios. Next, through three real development scenarios, you will experience how to do Vibe Coding with AI IDE + component libraries.
|
||||
|
||||
To show different styles and strengths, we intentionally use a different library in each scenario. But note: **this is only to let you see more options**. In real projects, you can absolutely stick to one library you like most. For example, if you like shadcn/ui, you can use it for landing pages, product pages, and admin systems. Pick one that looks good to you and feels comfortable to use - that matters most.
|
||||
|
||||
## 3. Scenario One: Build a Product Landing Page with HeroUI
|
||||
|
||||
**Scenario**: You built an AI writing assistant and need a beautiful landing page to show product features and attract user sign-ups. The landing page should have strong visual impact, smooth animation, and good mobile appearance.
|
||||
|
||||
**Why HeroUI**: HeroUI has very polished default styles and smooth transitions, which makes it ideal for user-facing showcase pages.
|
||||
|
||||
### 3.1 Create the Project
|
||||
|
||||
```bash
|
||||
# Use the official HeroUI CLI
|
||||
npx create-heroui-app@latest ai-writer-landing
|
||||
cd ai-writer-landing
|
||||
npm install
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with HeroUI homepage or component showcase screenshot -->
|
||||
<!--  -->
|
||||
|
||||
### 3.2 Generate the Landing Page with an AI IDE
|
||||
|
||||
Open your AI IDE (Cursor, Trae, etc.) and enter:
|
||||
|
||||
```text
|
||||
Please help me build a landing page for an AI writing assistant using the HeroUI component library:
|
||||
|
||||
**Page structure:**
|
||||
1. Top navigation bar: put Logo and product name on the left, three links "Features", "Pricing", "About" on the right, plus a "Get Started" button
|
||||
2. Hero section: main headline "Make AI your writing partner", subtitle introducing product value, two buttons "Try Free" and "View Demo", and a product screenshot below
|
||||
3. Feature section: three-column cards introducing "Smart Continuation", "Style Adjustment", and "Multilingual Translation"; each card should have icon, title, and description
|
||||
4. Pricing section: three pricing cards (Free, Pro, Team), with Pro highlighted as recommended
|
||||
5. Bottom CTA: one compelling line of copy and a signup button
|
||||
6. Footer: copyright information and social media links
|
||||
|
||||
**Design requirements:**
|
||||
- modern and professional look
|
||||
- support dark mode
|
||||
- should also look good on mobile
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with screenshot of AI IDE generation process or generated result -->
|
||||
<!--  -->
|
||||
|
||||
### 3.3 Key Components the AI Will Use
|
||||
|
||||
In the code generated by AI, you will see these HeroUI components:
|
||||
|
||||
```jsx
|
||||
import {
|
||||
Navbar, NavbarBrand, NavbarContent, NavbarItem,
|
||||
Button,
|
||||
Card, CardHeader, CardBody, CardFooter,
|
||||
Divider,
|
||||
Link,
|
||||
Chip
|
||||
} from '@heroui/react'
|
||||
```
|
||||
|
||||
Role of each component:
|
||||
|
||||
| Component | Usage | Position in the landing page |
|
||||
| :--- | :--- | :--- |
|
||||
| `Navbar` | Top navigation bar | Top of the page, fixed |
|
||||
| `Button` | Buttons with multiple variants and colors | CTA buttons, nav buttons |
|
||||
| `Card` | Card container | Feature cards, pricing cards |
|
||||
| `Chip` | Small badge/label | "Recommended", "Most Popular" markers |
|
||||
| `Divider` | Separator line | Visual separation between sections |
|
||||
|
||||
### 3.4 Iteration and Refinement
|
||||
|
||||
The first generated version may not be perfect. Continue the conversation with AI:
|
||||
|
||||
```text
|
||||
Please help me improve the landing page:
|
||||
|
||||
1. Add a gradient color to the main headline, from blue to purple
|
||||
2. Add a hover lift animation to feature cards
|
||||
3. Highlight the Pro pricing card with a border and a "Most Popular" badge
|
||||
4. On mobile, change the nav bar to a hamburger menu (three horizontal lines)
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with screenshot of the iterated landing page -->
|
||||
<!--  -->
|
||||
|
||||
> **Core idea of Vibe Coding**: You do not need to memorize every component API. Just describe the effect you want in natural language, and AI will choose suitable components and implementation. If something is not ideal, continue iterating in conversation.
|
||||
|
||||
## 4. Scenario Two: Build a Product Interface with shadcn/ui
|
||||
|
||||
**Scenario**: Your AI writing assistant needs a logged-in main interface - document list on the left, editor on the right, toolbar on top. This is a functional product page that needs highly customizable UI.
|
||||
|
||||
**Why shadcn/ui**: shadcn/ui puts component code directly into your project, so you can modify any detail freely. For deeply customized product interfaces, this "own the code" model is the most flexible.
|
||||
|
||||
<!-- TODO: Replace with shadcn/ui homepage or component showcase screenshot -->
|
||||
<!--  -->
|
||||
|
||||
### 4.1 Create the Project
|
||||
|
||||
```bash
|
||||
# Create a Next.js project
|
||||
npx create-next-app@latest ai-writer-app --typescript --tailwind --app
|
||||
cd ai-writer-app
|
||||
|
||||
# Initialize shadcn/ui
|
||||
npx shadcn@latest init
|
||||
|
||||
# Add components on demand (do not install everything at once)
|
||||
npx shadcn@latest add button card input sidebar sheet dialog
|
||||
```
|
||||
|
||||
The unique part of shadcn/ui: each time you `add` a component, it copies source code into your project's `components/ui/` directory. You can open these files and edit styles and behavior directly.
|
||||
|
||||
### 4.2 Generate the Product Interface with an AI IDE
|
||||
|
||||
```text
|
||||
Please help me build the main interface of an AI writing assistant using the shadcn/ui component library:
|
||||
|
||||
**Overall layout:**
|
||||
- Left side: a collapsible sidebar, about 280px wide:
|
||||
- Put a "New Document" button at the top
|
||||
- Below is a document list; each document shows title and last edited time
|
||||
- Right-click on a document should allow rename or delete
|
||||
- Right side: main editor area, split into upper and lower parts:
|
||||
- Top toolbar: editable document title, word count, "AI Continue" button, and an "Export" dropdown
|
||||
- Bottom editor area: one large text input filling remaining space
|
||||
|
||||
**Interaction details:**
|
||||
- After clicking "AI Continue", the button shows loading state, and AI-generated text appears at the bottom of the editor (shown character by character like a typewriter)
|
||||
- On mobile, the sidebar becomes a drawer that slides in from the left
|
||||
- The currently selected document should be highlighted
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with screenshot of AI-generated shadcn/ui product interface -->
|
||||
<!--  -->
|
||||
|
||||
### 4.3 Key Components the AI Will Use
|
||||
|
||||
```tsx
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Card, CardContent, CardHeader } from '@/components/ui/card'
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
import {
|
||||
Sheet,
|
||||
SheetContent,
|
||||
SheetTrigger
|
||||
} from '@/components/ui/sheet'
|
||||
import {
|
||||
Sidebar,
|
||||
SidebarContent,
|
||||
SidebarHeader
|
||||
} from '@/components/ui/sidebar'
|
||||
```
|
||||
|
||||
| Component | Usage | Position in the product page |
|
||||
| :--- | :--- | :--- |
|
||||
| `Sidebar` | Collapsible sidebar | Left document list |
|
||||
| `Sheet` | Mobile drawer | Mobile replacement for sidebar |
|
||||
| `DropdownMenu` | Dropdown menu | "Export" button, right-click menu |
|
||||
| `Dialog` | Dialog | Rename and delete confirmation |
|
||||
| `Button` | Button, supports variants and loading | Various action buttons |
|
||||
| `Input` | Input field | Document title editing |
|
||||
|
||||
### 4.4 Customize Component Styles
|
||||
|
||||
The advantage of shadcn/ui is that you can modify component source code directly. For example, if you want larger button corner radius:
|
||||
|
||||
```text
|
||||
Please edit components/ui/button.tsx,
|
||||
change all default button radius from rounded-md to rounded-xl,
|
||||
and add a subtle shadow effect to the primary variant.
|
||||
```
|
||||
|
||||
AI will directly modify component files in your project, instead of overriding npm package styles - this is the value of shadcn/ui "code ownership."
|
||||
|
||||
<!-- TODO: Replace with screenshot showing shadcn/ui component source files directly editable in project -->
|
||||
<!--  -->
|
||||
|
||||
## 5. Scenario Three: Build an Admin Dashboard with Ant Design
|
||||
|
||||
**Scenario**: After your AI writing assistant launches, you need an admin backend to inspect user data, manage document content, and process paid orders. The core of admin systems is data display and operation efficiency.
|
||||
|
||||
**Why Ant Design**: Ant Design has the deepest accumulation in back-office systems. Tables, forms, charts, and other business components are ready out of the box, with many built-in enterprise interaction patterns (batch actions, advanced filters, data export, etc.).
|
||||
|
||||
<!-- TODO: Replace with Ant Design homepage or Pro Components showcase screenshot -->
|
||||
<!--  -->
|
||||
|
||||
### 5.1 Create the Project
|
||||
|
||||
```bash
|
||||
# Use Ant Design Pro scaffolding (built-in layout, routing, permissions)
|
||||
npx create-umi@latest ai-writer-admin
|
||||
# Choose the Ant Design Pro template
|
||||
cd ai-writer-admin
|
||||
npm install
|
||||
```
|
||||
|
||||
Or start from scratch:
|
||||
|
||||
```bash
|
||||
npx create-react-app ai-writer-admin --template typescript
|
||||
cd ai-writer-admin
|
||||
npm install antd @ant-design/icons @ant-design/pro-components
|
||||
```
|
||||
|
||||
### 5.2 Generate the Admin Backend with an AI IDE
|
||||
|
||||
```text
|
||||
Please help me build an admin backend for an AI writing assistant using the Ant Design component library:
|
||||
|
||||
**Overall layout:**
|
||||
- Left side menu: Dashboard, User Management, Document Management, Order Management, System Settings
|
||||
- Top area shows breadcrumb navigation
|
||||
|
||||
**User Management page:**
|
||||
- Top area has four stats cards: total users, today's new users, active users, paid users
|
||||
- Search/filter area: search by username, select registration time range, filter by user status, plus "Search" and "Reset" buttons
|
||||
- User table:
|
||||
- Show avatar, username, email, registration time, subscription plan (distinguished by different tag colors), status, operations
|
||||
- 20 rows per page, with pagination
|
||||
- Support batch selection, batch disable, or export
|
||||
- Operation column: view details, edit, disable (disable requires secondary confirmation)
|
||||
- Clicking "View Details" opens a right-side drawer showing detailed user information and recent document list
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with screenshot of AI-generated Ant Design admin interface -->
|
||||
<!--  -->
|
||||
|
||||
### 5.3 Key Components the AI Will Use
|
||||
|
||||
```tsx
|
||||
import { PageContainer, ProLayout } from '@ant-design/pro-components'
|
||||
import { ProTable } from '@ant-design/pro-components'
|
||||
import { StatisticCard } from '@ant-design/pro-components'
|
||||
import {
|
||||
Button, Tag, Badge, Space, Drawer,
|
||||
Popconfirm, message, Modal
|
||||
} from 'antd'
|
||||
import {
|
||||
UserOutlined, SearchOutlined, ExportOutlined
|
||||
} from '@ant-design/icons'
|
||||
```
|
||||
|
||||
| Component | Usage | Position in backend |
|
||||
| :--- | :--- | :--- |
|
||||
| `ProLayout` | Overall admin layout framework | Page skeleton (menu + content area) |
|
||||
| `ProTable` | Advanced table with built-in search, pagination, column settings | User list, document list, order list |
|
||||
| `StatisticCard` | Data statistic card | Dashboard and page-top overview |
|
||||
| `Tag` / `Badge` | Status tags | Subscription plans, user status |
|
||||
| `Drawer` | Side drawer | User details, edit forms |
|
||||
| `Popconfirm` | Confirmation popover | Dangerous actions like delete/disable |
|
||||
|
||||
### 5.4 Keep Iterating: Add a Dashboard
|
||||
|
||||
```text
|
||||
Please help me build a dashboard page:
|
||||
|
||||
1. Top four statistic cards: total users, total documents, today's API calls, monthly revenue. Each card should show value and period-over-period change (up or down)
|
||||
2. Put two charts in the middle:
|
||||
- Left: user growth line chart for the last 7 days
|
||||
- Right: pie chart of subscription plan distribution
|
||||
3. Bottom: recent operation log table, showing time, user, operation type, details
|
||||
|
||||
Use Ant Design components for layout, and you can use Ant Design Charts for charts.
|
||||
```
|
||||
|
||||
<!-- TODO: Replace with screenshot of dashboard page -->
|
||||
<!--  -->
|
||||
|
||||
> **Vibe Coding tip for admin systems**: Admin page structures are relatively fixed (table + search + modal), so they are perfect for batch generation with AI. You can first ask AI to generate one "User Management" page as a template, then say "Based on the same structure, generate a Document Management page." AI will reuse the same layout pattern.
|
||||
|
||||
## 6. Learn to Read Docs: The "Manual" of Component Libraries
|
||||
|
||||
In Vibe Coding, AI writes most code for you. But when the generated result is not correct, or when you want to fine-tune component behavior, **reading the docs** is the fastest way to solve it.
|
||||
|
||||
Take Ant Design as an example. Its docs URL is: `https://ant.design/components/overview-cn`
|
||||
|
||||
Standard docs workflow:
|
||||
|
||||
1. **Clarify the need**: for example, "I need row selection in a table."
|
||||
2. **Search in docs**: search "Table" and enter the table component page
|
||||
3. **Check examples**: each component has multiple live examples; find the "selectable rows" example
|
||||
4. **Copy code**: copy the example code into your project
|
||||
5. **Check API table**: at the bottom of the page, find the full config for `rowSelection`
|
||||
|
||||
> You can also send docs links directly to your AI IDE: "Please refer to the rowSelection API in https://ant.design/components/table-cn and help me add batch selection to the user table." Giving AI the docs link makes generated code more accurate.
|
||||
|
||||
Quick docs links for each library:
|
||||
|
||||
| Component Library | Docs URL |
|
||||
| :--- | :--- |
|
||||
| Ant Design | `https://ant.design/components/overview-cn` |
|
||||
| shadcn/ui | `https://ui.shadcn.com/docs/components` |
|
||||
| HeroUI | `https://heroui.com/docs/components` |
|
||||
| Material UI | `https://mui.com/material-ui/all-components/` |
|
||||
| Element Plus | `https://element-plus.org/zh-CN/component/overview.html` |
|
||||
|
||||
## 7. Summary
|
||||
|
||||
The three practical scenarios cover the most common frontend development needs:
|
||||
|
||||
| Scenario | Recommended component library | Core strengths |
|
||||
| :--- | :--- | :--- |
|
||||
| Landing page / showcase page | HeroUI | Beautiful default styles, smooth animation, strong visual impact |
|
||||
| Product functional page | shadcn/ui | Full code control, flexible deep customization |
|
||||
| Admin system | Ant Design | Rich business components, tables/forms ready out of the box |
|
||||
|
||||
Vibe Coding workflow summary:
|
||||
|
||||
1. Choose a suitable component library based on scenario
|
||||
2. Use AI IDE to describe page structure and interactions you want
|
||||
3. AI generates first-version code, and you preview result
|
||||
4. Continue iterating with natural language
|
||||
5. When details get stuck, read component-library docs
|
||||
|
||||
### Practice
|
||||
|
||||
Pick one scenario below and complete it from scratch with AI IDE + component library:
|
||||
|
||||
1. Use HeroUI to build a showcase landing page for a project you built earlier (for example, Hogwarts Portraits)
|
||||
2. Use shadcn/ui to build the main interface for a note app (sidebar + editor)
|
||||
3. Use Ant Design to build a simple content-management backend (article list + new-article form)
|
||||
|
||||
---
|
||||
|
||||
## Appendix: More Component Libraries
|
||||
|
||||
Besides the four core libraries covered in the main text, the frontend ecosystem has many excellent component libraries. Below they are grouped by framework to help you choose by project needs.
|
||||
|
||||
### Vue Ecosystem
|
||||
|
||||
| Component Library | Stars | Description | Suitable Scenarios |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| [Element Plus](https://element-plus.org) | ~27k | Vue 3 enterprise component library from the Ele.me team, most widely used in China, excellent Chinese ecosystem | Back-office admin systems |
|
||||
| [Vuetify](https://vuetifyjs.com) | ~41k | Most popular Vue Material Design component library, 80+ components, complete docs | Google-design-style projects |
|
||||
| [Ant Design Vue](https://antdv.com) | ~21k | Vue 3 component library based on Ant Design system, unified design specification | Enterprise back-office systems |
|
||||
| [Naive UI](https://www.naiveui.com) | ~18k | Written in TypeScript, highly theme-customizable, no CSS preprocessor dependency | Projects with unique design needs |
|
||||
| [Quasar](https://quasar.dev) | ~27k | One codebase for SPA, SSR, PWA, mobile, and desktop apps | Cross-platform projects |
|
||||
| [Vant](https://vant-ui.github.io/vant) | ~24k | Lightweight mobile component library from Youzan, covering common e-commerce needs | Mobile H5 pages |
|
||||
| [PrimeVue](https://primevue.org) | ~14k | 90+ components, multiple themes (Material, Bootstrap, etc.) | Projects needing rich components and multi-theme support |
|
||||
| [Arco Design Vue](https://arco.design/vue) | ~3k | Produced by ByteDance, high component quality, built-in dark mode | Back-office products |
|
||||
| [TDesign Vue Next](https://tdesign.tencent.com/vue-next) | ~2k | Produced by Tencent, unified design language, covers common desktop scenarios | Tencent ecosystem or enterprise projects |
|
||||
|
||||
### React Ecosystem
|
||||
|
||||
| Component Library | Stars | Description | Suitable Scenarios |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| [Material UI (MUI)](https://mui.com) | ~95k | Long-established implementation of Google Material Design, most complete components, most mature ecosystem | Rapid enterprise app building |
|
||||
| [Ant Design](https://ant.design) | ~94k | Produced by Ant Group, many high-quality business components, dominant among Chinese developers | Enterprise back-office systems |
|
||||
| [shadcn/ui](https://ui.shadcn.com) | ~83k | Copy code into project instead of npm install, based on Radix UI + Tailwind CSS, fully controllable | Highly customized projects |
|
||||
| [Chakra UI](https://chakra-ui.com) | ~39k | Focus on developer experience, concise API, built-in accessibility support | Rapid prototype development |
|
||||
| [Mantine](https://mantine.dev) | ~28k | 100+ components and 50+ hooks, including advanced components like date pickers and rich text editors | Teams needing an all-in-one out-of-the-box solution |
|
||||
| [Headless UI](https://headlessui.com) | ~27k | Unstyled component library from Tailwind Labs, supports both React and Vue | Best with Tailwind CSS |
|
||||
| [HeroUI](https://heroui.com) | ~24k | Based on Tailwind CSS + React Aria, beautiful defaults, smooth animation | Projects pursuing visual quality |
|
||||
| [Radix UI](https://www.radix-ui.com) | ~17k | Unstyled primitive component library focused on accessibility and behavior; foundational layer of shadcn/ui | Building custom design systems |
|
||||
|
||||
#### shadcn/ui Extension Ecosystem
|
||||
|
||||
Beyond the general component libraries above, the shadcn/ui ecosystem has also produced many extension libraries based on the same philosophy, offering differentiated choices for specific scenarios. These extensions also use the "copy code into project" model, giving developers full source-code control.
|
||||
|
||||
| Component Library | Description | Suitable Scenarios |
|
||||
| :--- | :--- | :--- |
|
||||
| [Aceternity UI](https://ui.aceternity.com) | 200+ production-grade components, featuring glow cards, gradient text, 3D earth, and other signature visual components | High-polish landing pages, SaaS products |
|
||||
| [Tailark UI](https://tailark.com) | Collection of marketing website blocks, including frequent modules like product showcases, testimonials, and CTA buttons | Marketing landing pages, product websites |
|
||||
| [UI Tripled](https://ui.tripled.work) | Dynamic interaction components based on Framer Motion, including modal, navigation, card animation | Creative tools, personal portfolios |
|
||||
| [Neobrutalism UI](https://neobrutalism.dev) | Neo-brutalism style with thick lines, high contrast, and bold colors | Personalized brand websites, creative projects |
|
||||
| [REUI](https://reui.io) | 967+ component composition patterns from real business scenarios | Enterprise backends, complex forms |
|
||||
| [Cult UI](https://cult-ui.com) | More refined interaction and visual polish, including compound components like data tables and filter panels | High-quality commercial products |
|
||||
| [Kibo UI](https://kibo-ui.com) | Advanced business components such as color picker, rich text editor, file upload | Admin systems, tool products |
|
||||
| [Kokonut UI](https://kokonutui.com) | 100+ components + 7+ complete templates, fresh and minimalist style | SaaS sites, blogs, e-commerce |
|
||||
| [Commerce UI](https://ui.stackzero.co) | Specialized for e-commerce scenarios, including product cards, shopping cart, checkout forms | E-commerce platforms |
|
||||
| [shadcnblocks](https://shadcnblocks.com) | 1373 UI blocks + 13 complete templates, most comprehensive resources | All scenarios |
|
||||
| [Shoogle](https://shoogle.dev) | Aggregated search platform for shadcn/ui ecosystem | Quickly finding resources |
|
||||
| [Discover All Shadcn](https://allshadcn.com) | Aggregated resource navigation | Quickly finding resources |
|
||||
|
||||
> **Why choose shadcn/ui extensions?** These extensions inherit the shadcn/ui "code ownership" philosophy, while adding deep customization for specific scenarios. In the Vibe Coding era, they help you quickly find components that match your design goals, break away from homogenized mainstream UI patterns, and build more differentiated products.
|
||||
+49
-44
@@ -1,78 +1,88 @@
|
||||
# Full-Stack Development
|
||||
# Junior Developer
|
||||
|
||||
Welcome to the **Full-Stack Development** stage! Here, you will dive deep into full-stack development, mastering frontend componentization, database design, backend API development, and deployment.
|
||||
Welcome to the **Junior Developer** stage! Here, you will go deeper into full-stack development and learn modern frontend workflows, database design, backend APIs, deployment, and AI-powered product building.
|
||||
|
||||
## What You Will Learn
|
||||
|
||||
### Frontend Development
|
||||
|
||||
Master modern frontend development and learn to use component libraries and design tools:
|
||||
Master modern frontend development and learn how to use design tools, component libraries, and AI-native UI workflows:
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Frontend 0: Using Lovart for Assets"
|
||||
description="Learn how to use AI tools like Lovart to quickly generate high-quality game assets and UI resources"
|
||||
href="/en/stage-2/frontend/2.0-lovart-assets/"
|
||||
title="Frontend 0: Build Your Own Asset-Production Agent with Lovart"
|
||||
description="Use Nanobanana and Lovart to batch-generate high-quality visual assets, then build a drawing agent with intent recognition"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
href="/en/stage-2/frontend/2.1-figma-mastergo/"
|
||||
title="Frontend 1: Figma & MasterGo Basics"
|
||||
description="Master the basic operations of professional UI design tools and the workflow from design to code"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Frontend 2: Building Your First Modern App - UI Design"
|
||||
description="Design a modern web application interface from scratch, practicing UI design principles"
|
||||
href="/en/stage-2/frontend/2.2-ui-design/"
|
||||
title="Frontend 2: Build Your First Modern App - UI Design"
|
||||
description="Learn the UI design foundations for modern applications"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Frontend 3: UI Design Guidelines & Multi-Product UI"
|
||||
href="/en/stage-2/frontend/2.3-multi-product-ui/"
|
||||
title="Frontend 3: UI Guidelines and Multi-Product Design"
|
||||
description="Learn mainstream UI design guidelines to improve product design consistency and aesthetics"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
href="/en/stage-2/frontend/2.4-llm-skills-beautiful/"
|
||||
title="Frontend 4: Make Interfaces Beautiful with LLMs and Skills"
|
||||
description="Use prompts and plugins in real projects to make AI generate more polished, distinctive interfaces"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-2/frontend/2.5-hogwarts-portraits/"
|
||||
title="Frontend 4: Let's Build Hogwarts Portraits"
|
||||
description="Practical project: Build an interactive Hogwarts portrait application using AI-generated images"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-2/frontend/2.6-design-to-code/"
|
||||
title="Frontend 6: From Design Prototype to Project Code"
|
||||
description="Learn how to turn design prototypes into frontend code that really runs in the browser"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-2/frontend/2.7-modern-component-library/"
|
||||
title="Frontend 7: Upgrade Your UI with Modern Component Libraries"
|
||||
description="Use component libraries to build professional interfaces faster"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
|
||||
### Backend & Full-Stack
|
||||
### Backend Development
|
||||
|
||||
Learn API design, database management, and application deployment strategies:
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 1: What is API"
|
||||
description="Understand the core concept of APIs, the bridge between frontend and backend"
|
||||
href="/en/stage-2/backend/2.4-git-workflow/"
|
||||
title="Backend 1: Learn Git and GitHub"
|
||||
description="Master core version control operations and collaboration workflows with Git"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
href="/en/stage-2/backend/2.2-database-supabase/"
|
||||
title="Backend 2: From Database to Supabase"
|
||||
description="Master relational database basics and learn to use Supabase, a modern BaaS platform"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 3: AI-Assisted Interface Code & Documentation"
|
||||
href="/en/stage-2/backend/2.3-ai-interface-code/"
|
||||
title="Backend 3: Backend API Design and Development"
|
||||
description="Use AI to assist in generating backend interface code and standard API documentation"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 4: Git Workflow"
|
||||
description="Master core operations and collaboration workflows of the Git version control system"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 5: Zeabur Deployment"
|
||||
href="/en/stage-2/backend/2.5-zeabur-deployment/"
|
||||
title="Backend 4: Ship Your Product Prototype"
|
||||
description="Learn to quickly deploy your full-stack applications to the cloud using Zeabur"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 6: Modern CLI Development Tools"
|
||||
href="/en/stage-2/backend/2.6-modern-cli/"
|
||||
title="Backend 5: From IDEs to CLI AI Coding Tools"
|
||||
description="Explore modern CLI tools to enhance command-line development experience"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Backend 7: Integrating Stripe Payment Systems"
|
||||
href="/en/stage-2/backend/2.7-stripe-payment/"
|
||||
title="Backend 6: Integrate Stripe and Other Billing Systems"
|
||||
description="Practical: Integrate Stripe payment functionality into your application for monetization"
|
||||
/>
|
||||
</NavGrid>
|
||||
@@ -83,12 +93,12 @@ Learn API design, database management, and application deployment strategies:
|
||||
Consolidate your full-stack development skills through practical projects:
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Assignment 1: Building Your First Modern App - Full-Stack"
|
||||
href="/en/stage-2/assignments/2.1-fullstack-app/"
|
||||
title="Assignment 1: Build Your First Modern App - Full Stack"
|
||||
description="Comprehensively apply what you've learned to independently complete a fully functional full-stack application"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
href="/en/stage-2/assignments/2.2-modern-frontend-trae/"
|
||||
title="Assignment 2: Modern Frontend Component Library + Trae"
|
||||
description="Use modern component libraries with Trae IDE to efficiently build complex frontend interfaces"
|
||||
/>
|
||||
@@ -98,29 +108,24 @@ Consolidate your full-stack development skills through practical projects:
|
||||
### AI Capabilities Extension
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
href="/en/stage-2/ai-capabilities/2.1-dify-knowledge-base/"
|
||||
title="AI 1: Dify Basics & Knowledge Base Integration"
|
||||
description="Learn to build AI applications using Dify and integrate private knowledge bases"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="AI 2: AI Dictionary Query & Multimodal API Integration"
|
||||
description="Explore more AI capabilities, integrating vision, voice, and other multimodal APIs"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
|
||||
## Who Is This For
|
||||
|
||||
- Developers with some programming foundation who want to systematically learn full-stack development
|
||||
- Developers with some programming foundation who want to systematically learn modern full-stack development
|
||||
- Learners transitioning from product manager to full-stack engineer
|
||||
- Junior to intermediate developers who want to master modern development tools and workflows
|
||||
- Entrepreneurs who want to independently develop complete products
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Complete the "Novice & Product Prototype" stage, or have equivalent basic knowledge
|
||||
- Complete the "Novice & Product Prototype" stage, or have equivalent foundational knowledge
|
||||
- Understand basic HTML/CSS/JavaScript concepts
|
||||
- Have preliminary knowledge of AI programming tools
|
||||
- Have a basic understanding of AI coding tools
|
||||
|
||||
Ready to dive deep into full-stack development? Click the left navigation to begin learning!
|
||||
Ready to move from product prototype to real full-stack delivery? Use the left navigation to start learning.
|
||||
|
||||
@@ -0,0 +1,938 @@
|
||||
As large language models (LLMs) are adopted more widely, enterprises face a very practical problem: how can a model answer questions accurately when those questions depend on internal documents, real-time data, or domain-specific knowledge? After all, a model's training data is limited and time-bounded, so it cannot cover company-specific business knowledge or constantly updated information.
|
||||
|
||||
One intuitive idea is this: since context windows keep getting larger, from 8K to 128K and now beyond one million tokens, why not just stuff the relevant documents into the prompt and let the model answer from those materials directly?
|
||||
|
||||
However, being able to process long context and being able to deliver correct answers stably, efficiently, and controllably in enterprise scenarios are two very different things. Blindly relying on long context brings a series of severe challenges, including exploding cost, diluted attention, and stale knowledge updates.
|
||||
|
||||
To solve these pain points, a technique called Retrieval-Augmented Generation, or RAG, emerged. Before the model generates an answer, RAG first retrieves precise external knowledge. Compared with simply expanding the context length in a brute-force way, RAG meets enterprise requirements for factual accuracy and fresh knowledge at lower cost, with higher accuracy and stronger controllability. It has therefore become a key foundation for building trustworthy AI applications.
|
||||
|
||||
In this tutorial, we will systematically explain what RAG is, trace the background behind its emergence and its core principles, and then explore its evolution from basic forms to advanced forms, along with where it may go next.
|
||||
|
||||
# What You Will Learn in This Lesson
|
||||
|
||||
- The core value of RAG: deeply understand how it addresses the central long-context problems of cost, attention, and knowledge freshness
|
||||
- How RAG works: see through concrete examples how it completes the full loop from retrieval to generation
|
||||
- The evolution of RAG: from basic Naive RAG to Advanced RAG and then to Modular RAG
|
||||
- Model selection for RAG: understand how to evaluate and choose the three key model types, Embedding, Rerank, and LLM
|
||||
- Enterprise RAG practice: learn the full-chain construction guide from data preprocessing to system deployment and evaluation
|
||||
- RAG evaluation and optimization: understand core metrics, mainstream frameworks, and continuous improvement methods
|
||||
- Frontier trends in RAG: explore how RAG is combining with agents, multimodality, and other emerging techniques
|
||||
|
||||
# What You Will Gain
|
||||
|
||||
After completing this tutorial, you will build a systematic beginner-level understanding of RAG technology. You will not only know what it is, but also why it works. You will also gain a clear blueprint for how to evaluate, choose, and design an efficient, reliable, and controllable RAG system that meets enterprise requirements, laying a solid foundation for building real enterprise-grade RAG applications.
|
||||
|
||||
# 1. Why RAG Is Needed
|
||||
|
||||
Retrieval-Augmented Generation (RAG) is one of the most important technical approaches in generative AI today. Its basic idea is simple: before asking a large model to generate an answer, the system first retrieves information related to the user's question from an external knowledge base, and then passes both the retrieved information and the original question to the model so the model can answer on top of real materials. That external knowledge base can be an enterprise's internal policies, process documents, and product knowledge, or an industry database, regulatory corpus, standards library, and so on.
|
||||
|
||||

|
||||
|
||||
At this point, a natural question appears: if large models can already "answer questions directly," why add another layer called Retrieval-Augmented Generation? Especially now that context windows are getting larger and larger, it can seem as if simply handing all relevant material to the model ought to solve most needs.
|
||||
|
||||
The real difference is that "being able to produce an answer" and "being able to continuously, stably, and controllably produce the right answer in a real business environment" are two completely different things. If you rely only on a model's parameter memory, or only on dumping large amounts of documents into a long context, at least three typical problems still appear in enterprise use.
|
||||
|
||||
1. Cost and efficiency problems:
|
||||
Even as context windows keep expanding, the idea of dumping all documents into the context at once is still impractical in real systems. The central contradiction shows up in two places:
|
||||
2. Inference cost is strongly positively correlated with context length. The longer the context, the more inference cost rises, almost linearly and sometimes even superlinearly. For a single call, 8K tokens and 200K tokens live in completely different price and latency ranges, and long context has a much higher cost threshold.
|
||||
|
||||

|
||||
|
||||
> In meaning, context is the background information and conversation history the model "refers to" when answering a question. In technical terms, it is the total token sequence fed into the model for one inference, such as system and user instructions, message history, and retrieved passages.
|
||||
>
|
||||
> A "context window" is the capacity limit for that input. In mainstream large-model architectures today, such as Transformers, those tokens participate in attention computation at every layer. Once the window becomes longer and the token count increases, compute and cost rise multiplicatively and can even approach exponential growth.
|
||||
|
||||
3. A large amount of compute is wasted. Most tasks need only a very small amount of information that is highly relevant to the current question. Stuffing the full document set into the context creates serious idle and wasted computation, lowers system throughput, slows response speed, and eventually harms user experience.
|
||||
4. Attention and focus problems:
|
||||
A large model may be able to "cover" ultra-long context, but it cannot use every segment with equal quality. Once context length crosses a certain threshold, the model begins to show obvious attention bias:
|
||||
5. Attention decay: the model's attention to early and middle parts of the context gradually weakens, and it tends to rely more on text it read later, so early critical information can be effectively ignored.
|
||||
6. Information interference: the model can easily be dragged off course by irrelevant, repetitive, or even conflicting information inside the context. The final answer may sound logically coherent while still drifting away from the core question, making accuracy hard to guarantee.
|
||||
Without a retrieval stage to filter and rank relevance, the longer the context becomes, the harder it is to keep the answer focused on the truly key evidence. The advantage of long context can be fully canceled out by information interference.
|
||||
7. Knowledge freshness and controllability problems:
|
||||
If all knowledge is stored entirely in model parameters, or manually copied into prompts, two unavoidable defects appear:
|
||||
8. Knowledge updates are difficult: once the knowledge changes, such as policy changes, product iterations, or price updates, you either need to retrain or fine-tune the model, which is costly and slow, or maintain prompt templates manually, which is also costly and prone to human error.
|
||||
9. Traceability is poor: when a model answers, it is often difficult to locate the exact pieces of evidence from either black-box parameters or long prompts. This makes compliance audits, risk explanations, and other tasks that require clear decision grounds extremely difficult.
|
||||
|
||||
Under these real constraints, the advantage of RAG becomes much clearer. Its core approach is to locate relevant and reliable information before generation, so the model answers only from necessary knowledge. Knowledge can be stored independently in an external knowledge base, making it easier to update and manage. At the same time, generated results can include cited sources, improving interpretability and trustworthiness. Even if context windows keep growing in the future, RAG will still enable efficient knowledge management and use at relatively low cost, supporting enterprise-grade knowledge applications whose process is observable and whose behavior is traceable.
|
||||
|
||||
From the perspective of enterprise requirements, compared with a traditional LLM that relies only on its internal parameters, RAG mainly solves the following real-world deployment problems:
|
||||
|
||||
1. Freshness:
|
||||
Traditional models usually do not know new regulations, products, or workflows that appeared after their training cutoff, but RAG can directly read the latest policy documents, business databases, and knowledge bases. Without frequent retraining, answers can stay synchronized with the latest business state.
|
||||
2. Specialization:
|
||||
In vertical domains such as healthcare, chemicals, or finance, general-purpose models often do not understand deeply enough or speak precisely enough. After connecting enterprise-owned domain documents and industry standards, answers can be grounded in authoritative materials and become much closer to real business practice.
|
||||
3. Hallucination:
|
||||
By requiring answers to stay grounded in retrieved passages and provide citations, the system can reduce unsupported fabrication at the mechanism level, making "sounds true" much closer to "is actually true."
|
||||
4. Explainability and auditability:
|
||||
Pure parameter-based models often cannot answer, "Which rule was this conclusion derived from?" RAG lets each answer be traced back to a specific policy clause, business document, or historical case. That helps business staff inspect and correct answers and gives audit, risk, and compliance teams the traceability they need.
|
||||
5. Compute cost and resource efficiency:
|
||||
Making a model memorize all enterprise knowledge in its parameters usually means a larger model and higher inference cost. RAG stores most knowledge outside the model in vector stores and document stores and retrieves it on demand, allowing enterprises to get broader coverage and more accurate detail even with smaller models and limited compute.
|
||||
|
||||
Therefore, for enterprises that want to use large models in real business scenarios over the long term, stably and controllably, RAG is not an optional enhancement. It is almost an essential foundational technology for building a high-quality enterprise knowledge application system.
|
||||
|
||||
# 2. What RAG Is
|
||||
|
||||
The core idea of RAG, Retrieval-Augmented Generation, is to let a large model answer questions not only with static knowledge learned during training, but also with up-to-date and reliable information pulled from an external knowledge base at runtime.
|
||||
|
||||
In a typical RAG system, the user's question is not sent directly to the large model. Instead, a retrieval module first finds the most relevant document passages from the enterprise knowledge base, then combines those passages with the original question into a complete context, and finally gives that to the model to generate an answer. This "retrieve first, generate second" pattern allows the model to reason from real reference material instead of only guessing from what it remembers in its parameters. We can look at a typical case:
|
||||
|
||||

|
||||
|
||||
1. Indexing stage
|
||||
|
||||
In the indexing stage, the system first processes raw material such as internal enterprise documents, web pages, and reports. It splits them into smaller semantic chunks, then uses an embedding model to generate vector representations for each chunk and builds an index. Later, when a user question arrives, the system can quickly find the most semantically similar chunks in vector space.
|
||||
|
||||
In the diagram, this corresponds to the purple "Indexing" area in the upper right. The path from "Documents" through "Chunks / Vectors" to "embeddings" shows documents being chunked, converted into vectors, and written into the index. More concretely:
|
||||
|
||||
- Documents are divided into a set of semantically coherent chunks, each of which may correspond to a short news passage, explanation, or analysis.
|
||||
- Each chunk is converted into a high-dimensional vector by the embedding model and stored in the vector index.
|
||||
- This index supports similarity-based retrieval later, preparing a knowledge base the system can consult when answering questions.
|
||||
|
||||
2. Retrieval stage plus answer generation from retrieved results
|
||||
|
||||
After the user asks a question, the system first retrieves relevant content from the index, then sends the question and retrieved text together to the large model to generate an answer. In the figure, the key areas from upper to lower and right to left correspond exactly to this full flow.
|
||||
|
||||
(1) User input question: the yellow Input - Query area
|
||||
|
||||
> "How do you evaluate the fact that OpenAI's CEO, Sam Altman, went through a sudden dismissal by the board in just three days, and then was rehired by the company, resembling a real-life version of 'Game of Thrones' in terms of power dynamics?"
|
||||
>
|
||||
> "How do you evaluate the fact that OpenAI CEO Sam Altman was suddenly dismissed by the board and then rehired by the company just three days later, making the power struggle resemble a real-life version of Game of Thrones?"
|
||||
|
||||
This large block of text is the content inside the "Query" box in the diagram, corresponding to the user's natural-language question. The system vectorizes that question and uses it to search the upper-right index for related document chunks.
|
||||
|
||||
(2) Retrieved relevant documents: the pink Relevant Documents area at the lower right
|
||||
|
||||
After retrieval, the system gets several document chunks most related to the question. In the diagram, they are shown as three chunks:
|
||||
|
||||
> "Sam Altman Returns to OpenAI as CEO, Silicon Valley Drama Resembles the 'Zhen Huan' Comedy"
|
||||
> "Sam Altman returns as OpenAI CEO, and this Silicon Valley drama resembles a court-intrigue comedy."
|
||||
>
|
||||
> "The Drama Concludes? Sam Altman to Return as CEO of OpenAI, Board to Undergo Restructuring"
|
||||
> "Is the drama ending? Sam Altman will return as CEO of OpenAI, while the board will be restructured."
|
||||
>
|
||||
> "The Personnel Turmoil at OpenAI Comes to an End: Who Won and Who Lost?"
|
||||
> "OpenAI's personnel turmoil comes to an end: who won and who lost?"
|
||||
|
||||
(3) Combine the prompt and generate the answer: the blue LLM / Combine Context and Prompts area
|
||||
|
||||
The system then combines the original user question and the retrieved chunks into a complete prompt and sends it to the model. The dashed box in the lower middle of the figure shows a prompt example:
|
||||
|
||||
> "Question:
|
||||
> How do you evaluate the fact that the OpenAI's CEO, ... dynamics?
|
||||
>
|
||||
> Please answer the above questions based on the following information:
|
||||
> Chunk 1:
|
||||
> Chunk 2:
|
||||
> Chunk 3:"
|
||||
>
|
||||
> "Question:
|
||||
> How do you evaluate the power struggle in the OpenAI CEO incident?
|
||||
>
|
||||
> Please answer the above question based on the information below:
|
||||
> Chunk 1:
|
||||
> Chunk 2:
|
||||
> Chunk 3:"
|
||||
|
||||
(4) Answer comparison with and without RAG: the gray and yellow Output - Answer areas in the lower left
|
||||
|
||||
Finally, the model generates an answer based on the provided information. The figure also compares outputs with and without RAG. Without RAG, the model has no external material and can only give a vague response, corresponding to the gray box:
|
||||
|
||||
> "... I am unable to provide comments on future events. Currently, I do not have any information regarding the dismissal and rehiring of OpenAI's CEO ..."
|
||||
|
||||
With RAG, the model can use the retrieved news and analysis to produce a much more informative answer, corresponding to the yellow box:
|
||||
|
||||
> "... This suggests significant internal disagreements within OpenAI regarding the company's future direction and strategic decisions. All of these twists and turns reflect power struggles and corporate governance issues within OpenAI ..."
|
||||
|
||||
The example above shows the full flow of a typical RAG system and helps us understand its core stages and how information moves through them. But many important technical details remain inside a black box: how exactly is vector matching performed, and how should the prompt be organized so the model can use the retrieved content more effectively? These details largely determine real RAG quality. Next, we will go deeper into RAG's internal mechanism and break it down step by step, from vectorization principles and similarity computation to prompt engineering.
|
||||
|
||||
# 3. How RAG Works
|
||||
|
||||
We can break it down through a simple question-answering example built on a knowledge base about "apple."
|
||||
|
||||
## 3.1 Document Vectorization Stage
|
||||
|
||||
Suppose we have a simplified knowledge base containing these three document passages:
|
||||
|
||||
1. Passage A: Apple Inc. was founded on April 1, 1976 by Steve Jobs, Steve Wozniak, and Ronald Wayne, and its headquarters are in Cupertino, California.
|
||||
2. Passage B: Apples are a fruit rich in vitamin C and dietary fiber, which helps digestion and immune-system health.
|
||||
3. Passage C: Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry.
|
||||
|
||||
When we process these documents with an embedding model, such as OpenAI's `text-embedding-ada-002` or an open-source BGE model, each passage is converted into a high-dimensional vector, often with 768, 1024, or 1536 dimensions.
|
||||
|
||||
> A vector is essentially an array made of many numeric values. Each dimension corresponds to a semantic feature of the text. For example, the vector for "cat" may contain dimensions related to mammal, household pet, and furry. The final combination of values captures the semantic meaning of the text so the computer can "understand" relationships between texts.
|
||||
|
||||
Simplified examples, with real vectors being much higher-dimensional:
|
||||
|
||||
- Vector for passage A, about Apple's founding: `[0.85, -0.23, 0.41, -0.56, 0.12, 0.78, ...]`
|
||||
- Vector for passage B, about apples as fruit: `[-0.12, 0.95, -0.34, 0.67, -0.89, 0.05, ...]`
|
||||
- Vector for passage C, about the iPhone launch: `[0.79, -0.18, 0.52, -0.61, 0.23, 0.81, ...]`
|
||||
|
||||
These vectors then need to be stored in a vector database, such as Pinecone, Weaviate, or FAISS, for later retrieval and recall.
|
||||
|
||||
> A database is a system that stores and manages data in a structured way, enabling organized storage and efficient retrieval. Common examples include contact lists and e-commerce product catalogs.
|
||||
>
|
||||
> A vector database is a specialized kind of database. Unlike traditional databases, which store text, tables, and other ordinary data structures, a vector database is designed specifically to store vectors, that is, high-dimensional numeric arrays, and it is optimized for similarity search in AI scenarios.
|
||||
|
||||
## 3.2 User Query, Retrieval, and Response Stage
|
||||
|
||||
Once the knowledge base has been vectorized and stored, a RAG system can support real-time user queries. When a user asks a question, the system executes a continuous flow: it first converts the question into a vector, then uses similarity computation to retrieve the most relevant information from the knowledge base, and finally uses those passages as the basis for answer generation. We can illustrate this process with three concrete queries.
|
||||
|
||||
### Query 1: "When was Apple Inc. founded?"
|
||||
|
||||
At the query-vectorization stage, the question is converted by the embedding model into a semantic vector, for example `[0.82, -0.21, 0.38, -0.58, 0.15, 0.76, ...]`. This numeric pattern is highly similar to the stored vector for passage A, the one about the company's founding.
|
||||
|
||||
The system then performs similarity retrieval, Top-K with K = 2, by computing cosine similarity between the query vector and all document vectors in the knowledge base. The result looks like this:
|
||||
|
||||
- Similarity with passage A, the founding passage: 0.97, highly relevant
|
||||
- Similarity with passage C, the iPhone launch passage: 0.88, relevant because it is also about the company
|
||||
- Similarity with passage B, the fruit nutrition passage: 0.12, almost irrelevant
|
||||
|
||||
> Top-K is a common selection strategy in vector retrieval. It means ranking all matches from highest to lowest similarity and keeping the top K results. K = 2 means the system retains only the top two document vectors by similarity and filters out lower-ranked ones, so the next stage generates the answer only from the two most relevant document passages.
|
||||
|
||||
The results filtered by similarity are called recall results. The system returns the Top-2 passages as evidence:
|
||||
|
||||
1. Passage A, similarity 0.97: "Apple Inc. was founded on April 1, 1976 by Steve Jobs, Steve Wozniak, and Ronald Wayne, and its headquarters are in Cupertino, California."
|
||||
2. Passage C, similarity 0.88: "Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry."
|
||||
|
||||
At the answer-generation stage, the system builds a complete structured input by placing the recalled content inside the reference information section and sending it together with a system prompt:
|
||||
|
||||
```text
|
||||
[System Prompt]
|
||||
You are a professional question-answering assistant. Please answer strictly according to the "reference information" provided by the user.
|
||||
If the reference information contains the answer, answer directly based on it.
|
||||
If the reference information does not contain the answer, explicitly tell the user that "the question cannot be answered based on the currently available materials," and do not fabricate information.
|
||||
Please indicate which information point your answer is based on.
|
||||
|
||||
[Retrieved Context]
|
||||
Apple Inc. was founded on April 1, 1976 by Steve Jobs, Steve Wozniak, and Ronald Wayne, and its headquarters are in Cupertino, California.
|
||||
Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry.
|
||||
|
||||
[User Query]
|
||||
When was Apple Inc. founded?
|
||||
```
|
||||
|
||||
After receiving this structured input, the LLM follows the system instruction and treats the retrieved context as the only trustworthy source for answering. Its final response would look like this:
|
||||
|
||||
> According to the provided reference information, Apple Inc. was founded on April 1, 1976. [Basis: Information 1]
|
||||
|
||||
### Query 2: "What are the benefits of eating apples?"
|
||||
|
||||
At the query-vectorization stage, this question is converted into a semantic vector such as `[-0.08, 0.92, -0.31, 0.71, -0.85, 0.08, ...]`. Its numerical pattern is highly similar to the stored vector for passage B, the one about apple nutrition.
|
||||
|
||||
The system again performs Top-K similarity retrieval with K = 2 and computes cosine similarity:
|
||||
|
||||
- Similarity with passage B, fruit nutrition: 0.95, highly relevant
|
||||
- Similarity with passage C, iPhone launch: 0.18, almost irrelevant
|
||||
- Similarity with passage A, company founding: 0.15, almost irrelevant
|
||||
|
||||
The system returns the Top-2 passages as evidence:
|
||||
|
||||
1. Passage B, similarity 0.95: "Apples are a fruit rich in vitamin C and dietary fiber, which helps digestion and immune-system health."
|
||||
2. Passage C, similarity 0.18: "Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry." This is only weakly related and would often be filtered by a threshold in practice.
|
||||
|
||||
The complete structured input is then built as follows:
|
||||
|
||||
```text
|
||||
[System Prompt]
|
||||
You are a professional question-answering assistant. Please answer strictly according to the "reference information" provided by the user.
|
||||
If the reference information contains the answer, answer directly based on it.
|
||||
If the reference information does not contain the answer, explicitly tell the user that "the question cannot be answered based on the currently available materials," and do not fabricate information.
|
||||
Please indicate which information point your answer is based on.
|
||||
|
||||
[Retrieved Context]
|
||||
Apples are a fruit rich in vitamin C and dietary fiber, which helps digestion and immune-system health.
|
||||
Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry.
|
||||
|
||||
[User Query]
|
||||
What are the benefits of eating apples?
|
||||
```
|
||||
|
||||
Its final response would then look like:
|
||||
|
||||
> According to the provided reference information, apples are rich in vitamin C and dietary fiber, and eating apples helps digestion and immune-system health. [Basis: Information 1]
|
||||
|
||||
### Query 3: "How is the weather today?"
|
||||
|
||||
At the query-vectorization stage, this question becomes a semantic vector related to weather and meteorology, for example `[0.10, -0.05, 0.30, -0.12, 0.21, 0.08, ...]`. In semantic space, this vector is far away from all document vectors about apples, whether the company or the fruit, so no significant similarity appears.
|
||||
|
||||
The system again performs Top-K retrieval with K = 2. Because the question topic is unrelated to the knowledge base, overall similarity scores are all very low:
|
||||
|
||||
- Similarity with passage B, fruit nutrition: 0.18, extremely low
|
||||
- Similarity with passage C, iPhone launch: 0.10, almost irrelevant
|
||||
- Similarity with passage A, company founding: 0.08, almost irrelevant
|
||||
|
||||
Top-K still returns the top-ranked K results, but in this case those results do not provide effective evidence. In practice, the system often applies a minimum similarity threshold and directly returns empty recall, that is, no valid results, to reduce irrelevant interference.
|
||||
|
||||
The two returned passages would still be:
|
||||
|
||||
1. Passage B, similarity 0.18: "Apples are a fruit rich in vitamin C and dietary fiber, which helps digestion and immune-system health."
|
||||
2. Passage C, similarity 0.10: "Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry."
|
||||
|
||||
The full input would then be:
|
||||
|
||||
```text
|
||||
[System Prompt]
|
||||
You are a professional question-answering assistant. Please answer strictly according to the "reference information" provided by the user.
|
||||
If the reference information contains the answer, answer directly based on it.
|
||||
If the reference information does not contain the answer, explicitly tell the user that "the question cannot be answered based on the currently available materials," and do not fabricate information.
|
||||
Please indicate which information point your answer is based on.
|
||||
|
||||
[Retrieved Context]
|
||||
Apples are a fruit rich in vitamin C and dietary fiber, which helps digestion and immune-system health.
|
||||
Apple Inc. launched the first iPhone in 2007, fundamentally changing the smartphone industry.
|
||||
|
||||
[User Query]
|
||||
How is the weather today?
|
||||
```
|
||||
|
||||
The LLM would first judge whether the reference information contains direct weather or real-time meteorological information. After confirming that it does not, it would follow the instruction to answer that it cannot answer:
|
||||
|
||||
> The currently available materials cannot answer the question "How is the weather today?" because the reference information only contains content related to apples, fruit nutrition, and Apple Inc. products, and does not contain weather information or real-time meteorological data. [Basis: No weather-related information exists in the retrieved context]
|
||||
|
||||
From these three examples, we can see the key to the RAG dialogue stage. The system prompt defines the LLM's role and response rules, retrieved evidence provides concrete and trustworthy material, and the user's question defines the task objective. This structured-input pattern is exactly what lets RAG effectively guide and constrain an LLM that might otherwise hallucinate, turning it into a system that produces stable and reliable answers. It ensures that the model is used for understanding and organizing existing information rather than inventing unsupported information.
|
||||
|
||||
# 4. The Evolution of RAG
|
||||
|
||||
RAG did not originate in the era of large models. Earlier research already contained prototypes of the same idea. From a historical perspective, RAG arose from recognition of the limitations of traditional LLMs. Early large language models depended mainly on pretraining data, and that data became fixed once training finished. For example, models such as GPT-3 had knowledge cutoff dates tied to when the training data was collected and could not obtain later knowledge. Retraining or fine-tuning LLMs for specific domains also required large resources and specialized expertise, making it expensive and hard to iterate quickly.
|
||||
|
||||
The roots of RAG can be traced back to the DrQA framework in 2017, which first attempted to combine retrieval with language models. A major breakthrough then came in 2020 with Dense Passage Retrieval, or DPR, which used pretrained neural models for semantic retrieval instead of traditional word-frequency-based methods such as TF-IDF and BM25. In 2021, RAG was formally proposed and systematized, becoming a standard way to address the knowledge-cutoff and hallucination problems in LLMs.
|
||||
|
||||
Broadly speaking, the evolution of RAG can be divided into three stages:
|
||||
|
||||

|
||||
|
||||
## 4.1 First-Generation RAG: Naive RAG
|
||||
|
||||
Naive RAG is the most basic form of RAG. From an engineering perspective, it follows a very direct three-step flow:
|
||||
|
||||
1. Document preprocessing and indexing. Raw documents are cleaned, split into fixed-length text chunks, encoded into vectors with an embedding model, and written into a vector database.
|
||||
2. Similarity-based retrieval. The user's natural-language question is encoded into a vector, and the system performs a Top-K similarity search over the vector store.
|
||||
3. Simple retrieval-augmented generation. The retrieved chunks are directly concatenated with the original question to form a long prompt, which is sent to the LLM for answer generation.
|
||||
|
||||
The value of this stage is that it verified, with a very low barrier, that "retrieve before answering" actually works. Compared with relying only on the model's internal memory, it already significantly reduces knowledge-cutoff issues and some hallucinations, which is why it played an important role in early prototypes, demos, and introductory tutorials.
|
||||
|
||||
However, the limitations of first-generation RAG are also obvious. First, the chunking strategy is usually crude. Most systems simply split by fixed length, which can cut a coherent semantic paragraph in the middle or mix multiple topics inside one chunk. This hurts retrieval accuracy and also makes comprehension harder for the LLM. Second, the retrieval signal is too simple. Ranking usually depends only on vector similarity and does not use richer structured clues such as keywords, timestamps, source credibility, or access permissions. Third, retrieval results are barely governed at all: noisy, repetitive, and even contradictory chunks can be stuffed into the context unchanged, causing large amounts of low-value information to occupy an already limited context window.
|
||||
|
||||
In short, the first generation solved the question of whether retrieval is needed. But on the questions of how to retrieve better, and how to use retrieved information more reasonably, it still remained at a rather primitive stage.
|
||||
|
||||
## 4.2 Second-Generation RAG: Advanced RAG
|
||||
|
||||
As RAG moved from demos into real business scenarios, the requirements for stability, controllability, and output quality rose sharply. The second generation, usually grouped under the broad name Advanced RAG, still follows the pattern of retrieve first and generate second, but it introduces systematic refinement both before and after retrieval. In other words, the system is no longer satisfied with merely retrieving something. It now aims to store the right things properly, ask the right questions clearly, and govern the retrieved context carefully.
|
||||
|
||||
Before retrieval, the focus is on storing and asking well:
|
||||
|
||||
- On the indexing side, chunking evolves from fixed-length splits to semantically aware chunking and hierarchical indexing. The system may chunk along chapter, subsection, paragraph, or sentence boundaries, combined with sliding windows and multi-granularity index structures.
|
||||
- Each document chunk can carry rich metadata such as source, timestamp, author, topic, and document type, providing more dimensions for later filtering and ranking.
|
||||
- On the query side, the user's original question can be rewritten, expanded, or decomposed through techniques such as Query Rewrite, Multi-Query, Sub-Query decomposition, and Step-back Prompting, transforming vague or conversational user queries into forms that retrieval can understand better.
|
||||
|
||||
> 1. Query Rewrite
|
||||
>
|
||||
> The core idea is to transform the user's vague, colloquial, or nonstandard query into a normalized expression that the retrieval system can understand more easily, supplementing key information and resolving ambiguity.
|
||||
>
|
||||
> - For example, "How do I check tomorrow's weather in Beijing?" might be rewritten into something more standardized such as "Query tomorrow's full-day real-time weather in Beijing."
|
||||
> - Or "Recommend good movies" may be rewritten, after looking at user history, into "Recommend high-rated 2024 suspense movies."
|
||||
>
|
||||
> 2. Multi-Query
|
||||
>
|
||||
> The system generates multiple semantically related but differently angled queries from the original question to reduce missed results and cover latent needs the user did not explicitly state.
|
||||
>
|
||||
> 3. Sub-Query
|
||||
>
|
||||
> For compound questions that contain several goals, the system splits them into smaller, simpler sub-queries so retrieval can match each need precisely.
|
||||
>
|
||||
> 4. Step-back Prompting
|
||||
>
|
||||
> The system first generates a more abstract, higher-level question, then uses that to guide retrieval direction, reducing bias caused by being too narrowly focused on details in the original question.
|
||||
|
||||
After retrieval, the focus is on governing what was retrieved:
|
||||
|
||||
- A dedicated rerank model or even an LLM can rerank candidate documents so the most important and question-relevant content enters the context first.
|
||||
> A rerank model is a key component in an information-retrieval pipeline. It performs second-stage ranking on candidate results returned by the recall phase, using stronger semantic understanding, often based on Transformer architectures, to fix semantic ranking errors from the first stage and move the results most aligned with user needs further forward.
|
||||
- Retrieved passages can be filtered, deduplicated, and compressed to remove clearly irrelevant or highly repetitive chunks, reducing the tendency of long-context systems to ignore useful information in the middle.
|
||||
- When necessary, light model fine-tuning can make the LLM more likely to answer from retrieval evidence and include explicit citations or sources.
|
||||
|
||||
Overall, Advanced RAG is no longer focused only on whether retrieval is necessary or whether something can be retrieved. It instead addresses three larger challenges: whether the truly critical passages can be located precisely, whether the context handed to the large model is concise, well-structured, and easy to use efficiently, and whether the whole system remains stable and reliable in the presence of noise, conflict, or multi-source information needs.
|
||||
|
||||
Large amounts of experimental and engineering evidence show that Advanced RAG significantly outperforms Naive RAG on answer accuracy, hallucination suppression, system robustness, and explainability. That is why it has gradually replaced traditional basic approaches and become the mainstream industrial paradigm for building RAG systems today.
|
||||
|
||||
## 4.3 Third-Generation RAG: Modular RAG
|
||||
|
||||
In complex enterprise applications, requirements often span multiple domains. In those cases, a simple linear flow of retrieve, rerank, and generate is often not enough:
|
||||
|
||||
1. The same system may need to support simple FAQs, long report generation, code retrieval, and database calls.
|
||||
2. It may need to connect vector stores, full-text retrieval, relational databases, knowledge graphs, and external search engines at the same time.
|
||||
3. It may need to preserve user preferences and historical decisions over multiple rounds, while also applying compliance checks and answer traceability.
|
||||
|
||||
Against this background, RAG began evolving toward a modular system shape. Modular RAG is no longer viewed as a fixed pipeline. It is treated instead as a set of pluggable, replaceable, and composable function modules that can be orchestrated as needed. Typical modules include:
|
||||
|
||||
1. Query understanding and routing
|
||||
This module handles intent recognition, question rewriting, subtask decomposition, and path selection. It decides whether a request should rely mainly on internal knowledge, external retrieval, or a specific tool or database.
|
||||
2. Multi-source retrieval and fusion
|
||||
This module connects vector databases, full-text search, structured databases, and knowledge graphs simultaneously, queries them, and merges and reranks their results into a unified evidence set.
|
||||
3. Memory and personalization
|
||||
This module maintains long-term user profiles, short-term session memory, and domain knowledge caches so the system can continuously accumulate and use historical information.
|
||||
4. Task adaptation and governance
|
||||
This module loads different adapters for different tasks, constrains output format, tone, and style, and governs outputs through fact checking, risk filtering, and citation alignment.
|
||||
|
||||
In short, traditional RAG often ends after one retrieval round plus one generation round. Modular RAG breaks that single-flow pattern. If the system discovers during generation that information is still insufficient, it can proactively trigger new retrieval rounds and even move back and forth multiple times between retrieval and generation to complete a more complex task.
|
||||
|
||||
Going further, the model can learn to make its own decisions: answer directly from internal knowledge or short context when confidence is high, and launch retrieval or external tool calls only when uncertainty is high. That improves efficiency and saves resources while preserving quality. For heavily underspecified or incomplete queries, the model can even generate a hypothetical intermediate answer or draft document first, then use that as a clue for further retrieval, progressively approaching reliable sources.
|
||||
|
||||
At this stage, RAG is no longer just a simple component that attaches a few reference passages to a large model. It is becoming the central knowledge-orchestration layer inside enterprise intelligent applications, coordinating multiple data sources, multiple tools, and multiple tasks.
|
||||
|
||||
# 5. From Demo to Enterprise-Grade RAG
|
||||
|
||||
From the perspective of enterprise engineering, building a RAG system cannot be limited to retrieval-augmented generation alone. The material above is still closer to a demo-level introduction. In real business scenarios, data is often noisy and inconsistent in format, so more effort must be invested into preprocessing, cleaning, and ingestion, and model selection must be handled carefully at every key point.
|
||||
|
||||
A complete enterprise-grade RAG system can usually be divided into three core modules: layout analysis and knowledge ingestion, knowledge-base construction, and RAG-based question-answering service. Across the full technical chain, several key model-selection decisions appear, including the embedding model, rerank model, and LLM. Only with sensible technical choices at each stage can the system achieve strong overall results.
|
||||
|
||||
1. Layout analysis and local knowledge-file reading
|
||||
|
||||
This module converts local knowledge assets in different formats into text usable for retrieval. Inputs may include PDFs, TXT, HTML, Word, Excel, and PPT files, as well as scanned image files such as PNG and JPG, or even audio recordings.
|
||||
|
||||
The system needs to parse each format appropriately, perform layout analysis and structural extraction for text documents, distinguish titles, main body, tables, headers, and footers, and restore a sensible reading order. It performs OCR on image files and ASR on speech, finally converting everything into relatively clean knowledge text while retaining basic metadata such as file name, chapter, page number, and timestamp for later chunking and indexing.
|
||||
|
||||
2. Knowledge-base construction: chunking, embeddings, and indexing
|
||||
|
||||
After obtaining cleaned knowledge text, the system performs chunking, splitting long documents into semantically coherent blocks of suitable length, usually by paragraph, title structure, or sliding window, while preserving each chunk's source and metadata.
|
||||
|
||||
Then it uses the chosen embedding model, such as `text-embedding-3-small`, Sentence Transformers, or BGE, to calculate vector representations for each chunk and build a vector index using tools such as Faiss, Milvus, or managed vector-search services. At that point, a knowledge base that supports fast semantic retrieval has been created.
|
||||
|
||||
3. RAG-based question answering: recall, reranking, concatenation, generation
|
||||
|
||||
In the online QA stage, the user sends a query. The system embeds it into a query vector, retrieves a batch of the most similar text chunks from the vector index, and treats that as a coarse ranking stage. Then it can use a rerank model such as a BGE reranker or even an LLM acting as a reranker to score query-document pairs again and keep only the Top-K documents that are truly most relevant as the knowledge context.
|
||||
|
||||
Next, together with a carefully designed system prompt such as "Please answer strictly based on the following materials," the system concatenates the user query and retrieved document passages and sends the merged prompt to the LLM. The model then generates the final answer from those retrieved pieces of evidence and, when needed, includes citations or sources.
|
||||
|
||||
## 5.1 Model Selection
|
||||
|
||||
Next we focus on model selection. A complete RAG system usually involves three core model categories: embedding models, rerank models, and large language models. Each has its own role, and together they form the full path from retrieval to answer generation. The embedding model converts text into searchable semantic vectors, the rerank model refines initial retrieval results, and the LLM generates the final answer based on the selected knowledge context.
|
||||
|
||||
### 5.1.1 Embedding Models
|
||||
|
||||
In a RAG system, the job of the embedding model is to convert text, such as user queries and knowledge-base content, into high-dimensional vectors. Semantically similar texts are placed closer together in vector space, allowing the system to locate related knowledge quickly by similarity. Choosing the right embedding model is therefore one of the most critical steps in building a high-performance RAG system because it directly determines recall quality.
|
||||
|
||||
To choose a strong model, it helps to use a systematic benchmark. One of the most widely used is MTEB, the Massive Text Embedding Benchmark.
|
||||
|
||||
MTEB provides a unified and objective evaluation framework for many embedding models. Through eight major task categories and 56 datasets, it evaluates performance across retrieval, clustering, classification, reranking, text matching, semantic similarity, and more. A model's overall MTEB score reflects the generality and robustness of its vector representations and can serve as an important reference for model selection. The latest ranking can be checked on the Hugging Face MTEB leaderboard:
|
||||
|
||||
[HuggingFace MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard)
|
||||
|
||||

|
||||
|
||||
Although there are many models on the leaderboard, you do not need to master all of them. In practice, choosing the embedding model bundled by a major model provider, or using a cloud-served model that many people have already validated, is usually a safe choice. You can also filter the leaderboard by category or language in the sidebar:
|
||||
|
||||

|
||||
|
||||
When filtering embedding models, two parameters matter especially because they directly affect RAG performance: dimension and context length.
|
||||
|
||||
Dimension is the dimensionality of the vector output, such as 128, 768, or 1536. It roughly reflects how many semantic features the vector can express. Higher-dimensional vectors can capture richer semantic detail and stronger discrimination. For example, a 768-dimensional vector can represent "apple" from hundreds of angles such as variety, taste, and origin, making it suitable for professional scenarios like healthcare or law that need precise retrieval. Lower dimensions reduce computation and storage cost and improve retrieval speed, making them suitable for large-scale general scenarios with high concurrency and strong real-time requirements.
|
||||
|
||||
Context length is the maximum text length the embedding model can process in one pass, measured in tokens. One English token is roughly three quarters of a word, and one Chinese token is roughly one Chinese character. Anything longer than the maximum is truncated. This directly determines whether the model can fully understand the text. If important information is lost because the length is too short, retrieval accuracy drops sharply. For short user queries and short QA pairs, 512 to 1024 tokens is often enough. For longer texts such as papers and reports, you usually need 2048 tokens or more.
|
||||
|
||||
Below is a comparison of several common embedding models. In practice, you need to choose by balancing cost and performance. There is no universally best model, only the most suitable model after comparing several options in your own use case.
|
||||
|
||||
| Model Name | Model Scale | Core Strength | Suitable Scenarios |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| OpenAI `text-embedding-3-large` | Closed API | Long-term leader on MTEB, mature and stable | Cloud API scenarios that prioritize extreme performance and have enough budget |
|
||||
| `jina-embeddings-v2` | Supports long text up to 8K context | Strong for long-document retrieval through asynchronous encoding design | Document analysis, legal compliance, academic retrieval |
|
||||
| `multilingual-e5-large` | Large scale | Classic multilingual option | Cross-lingual RAG, international products, multilingual support systems |
|
||||
| `Qwen/Qwen2-Embedding-8B` | 8B parameters, up to 4096 custom dimensions | Former top multilingual MTEB performer, strong on long text, multilingual tasks, and code | High-precision Chinese-English RAG, long-document analysis, code retrieval |
|
||||
| `Qwen/Qwen2-Embedding-4B` | 4B parameters | Strong balance of performance and efficiency | Large-scale production RAG systems |
|
||||
| `Qwen/Qwen2-Embedding-0.6B` | 0.6B parameters | Suitable for edge devices | Resource-constrained, speed-first scenarios |
|
||||
| `BAAI/bge-m3` | Supports hybrid retrieval, dense plus sparse plus multi-vector | Strong on multilingual benchmarks such as MIRACL | Complex multilingual scenarios that need hybrid retrieval |
|
||||
| `BAAI/bge-large-zh-v1.5` | Large scale | Stable Chinese RAG baseline with strong community validation | Pure Chinese projects with shorter documents |
|
||||
| ZhipuAI `Embedding-3` | Closed cloud API | Supports custom dimensions from 256 to 2048 | Chinese-focused applications preferring cloud APIs |
|
||||
|
||||
### 5.1.2 Rerank Models
|
||||
|
||||
In a RAG system, the rerank model is responsible for finely reranking initial retrieval results. It takes the user query and candidate documents as input and computes an exact relevance score for each query-document pair. The higher the score, the better the match. Therefore, adding a rerank model on top of embedding-based recall is a key step for improving retrieval precision.
|
||||
|
||||
For embedding models, we can use benchmarks like MTEB. For rerank models, one useful reference is Agentset's reranker leaderboard:
|
||||
|
||||
[Reranker Leaderboard](https://agentset.ai/rerankers)
|
||||
|
||||
The Agentset benchmark first retrieves the 50 most relevant candidate results from a large document store using FAISS, then asks the rerank model under evaluation to rerank those 50 documents. The benchmark pays attention to both ranking quality and latency. In practical applications, pursuing precision while ignoring speed hurts user experience, while pursuing speed while sacrificing ranking quality harms usefulness.
|
||||
|
||||
Agentset also introduces an ELO scoring mechanism. For each query, GPT-5 acts as a judge and compares the ranked outputs of two different rerank models, deciding which one places truly relevant documents in a more sensible order. After large numbers of such pairwise comparisons, models that win more often receive higher ELO scores, providing an intuitive overall performance signal.
|
||||
|
||||
The benchmark also uses two complementary groups of metrics:
|
||||
|
||||
- `nDCG@5/10`, which focuses on whether relevant documents are placed near the front and therefore reflects ranking precision
|
||||
- `Recall@5/10`, which focuses on whether all relevant documents can be found and therefore reflects coverage
|
||||
|
||||
Together these metrics provide a more complete picture of rerank performance.
|
||||
|
||||
Still, in practice, you do not need to select rerank models only from a leaderboard. Industrial usefulness and leaderboard score are not always the same thing. A practical approach is to start from the rerank models recommended by your cloud vendors or default rerank APIs provided by major model vendors, or to test a model family you are already using, such as a matching Qwen rerank model.
|
||||
|
||||
### 5.1.3 LLMs
|
||||
|
||||
After semantic retrieval by the embedding model and refined filtering by the rerank model, the relevant document passages are combined with the user's original question into a prompt. The LLM then performs reading comprehension, information integration, and natural-language generation to output a coherent, accurate answer that fits the context.
|
||||
|
||||
At the implementation level, there are two main ways to use LLMs in RAG:
|
||||
|
||||
1. Privately deployed large models.
|
||||
These are suitable for scenarios that care about data privacy, controllable cost, or deep customization. Mainstream open models such as Qwen, Llama, and GLM perform well in RAG tasks. For example, Qwen2.5 in the 7B or 14B range offers good instruction-following and Chinese understanding while keeping resource use modest, making it suitable for local enterprise deployment. Models such as KIMI, Minimax, and DeepSeek can also be considered according to specific business needs.
|
||||
2. Cloud API large models.
|
||||
These fit scenarios that prioritize fast launch, elastic scaling, and continuous model upgrades. Major providers such as OpenAI, Anthropic, Google, Alibaba, and ZhipuAI all offer stable API services. These models generally have strong language understanding and generation ability and can synthesize answers well in RAG scenarios.
|
||||
|
||||
When selecting cloud models, several points matter: whether answer quality is accurate and fluent, whether price is reasonable, whether latency is acceptable, and whether the context window is large enough to hold multiple retrieved documents. In practice, you should compare several candidates on your own data and see which one gives the most complete and accurate answers. If cost is a concern, a useful approach is to combine large and small models: use cheaper small models for simple questions and reserve expensive large models for difficult cases. Since models update quickly, it is also wise to retest candidates periodically.
|
||||
|
||||
For broad conversation and QA ability, LMSYS Chatbot Arena, now LMArena, is one of the most widely recognized evaluation references:
|
||||
|
||||
[LMSYS Chatbot Arena (LMArena)](https://lmarena.ai/)
|
||||
|
||||
It uses blinded pairwise human comparisons to rank models. The ranking offers a useful first filter, but in actual RAG selection it should only be a starting point. In specialized domains such as medicine, law, and finance, general leaderboard ranking can diverge substantially from real performance on your business data.
|
||||
|
||||
Best practice for LLM selection is to build a small but representative test set containing 20 to 30 typical business questions and evaluate candidate models through the full end-to-end RAG pipeline rather than looking only at isolated model benchmarks. Questions such as whether to use reasoning models or non-reasoning models, or which model size best balances quality and speed, are all best answered through real testing on your own use case.
|
||||
|
||||
## 5.2 Execution Frameworks
|
||||
|
||||
In real engineering practice, you usually do not need to build an entire RAG system from zero. A number of mature open-source frameworks already exist, each with its own strengths in architecture, modular integration, and development efficiency. Enterprises can choose according to their own technical reserves and business scenarios.
|
||||
|
||||
Common framework types include:
|
||||
|
||||
**Low-code or visual platforms**
|
||||
|
||||
- [Dify](https://dify.ai): provides an intuitive visual interface for quickly building RAG applications, making it suitable for nontechnical teams or rapid prototype validation. It includes built-in multi-model access, workflow orchestration, and prompt management.
|
||||
- [Coze](https://www.coze.cn/): an AI bot development platform from ByteDance that offers zero-code visual construction. It integrates deeply with ByteDance model services, supports a plugin marketplace, scheduled tasks, and multichannel publishing, making it suitable for consumer-facing assistants or internal enterprise bots.
|
||||
- [n8n](https://n8n.io/): an open-source node-based workflow automation platform. In RAG scenarios, it can orchestrate complex business logic and connect preprocessing, vector database operations, model calls, and follow-up actions such as email sending or ticket updates into one automated flow.
|
||||
- [RAGFlow](https://ragflow.io/): focuses on deep layout analysis and knowledge extraction and performs well on complex documents such as multi-column PDFs and table-heavy materials.
|
||||
- [FastGPT](https://fastgpt.io/en): a Chinese open-source solution integrating knowledge-base management, dialogue orchestration, and application publishing, with strong Chinese documentation and suitability for fast deployment of Chinese RAG applications.
|
||||
|
||||
**Code frameworks and development libraries**
|
||||
|
||||
The tools below usually have implementations in different backend languages. You can choose the corresponding language version for your application stack.
|
||||
|
||||
- [LlamaIndex](https://www.llamaindex.ai/): a Python framework designed specifically for RAG, with rich connectors, index structures, and query engines. Its modularity makes it suitable for deeply customized retrieval strategies or integration with many data sources.
|
||||
- [LangChain](https://www.langchain.com/): a general LLM application framework where RAG is only one use case. Its strength is its rich ecosystem and component coverage, including support for complex agents and workflow orchestration, though its learning curve is steeper.
|
||||
|
||||
If the team's technical reserves are limited and speed matters most, low-code platforms such as Dify, Coze, or FastGPT are good first choices. If you need deep customization, special data-source integration, or detailed performance tuning, LlamaIndex and LangChain offer more flexibility. In practice, a hybrid route is also common: use a low-code platform for rapid feasibility validation, then move to code frameworks for production deployment and optimization. Most of these frameworks also support rapid integration with mainstream embedding, rerank, and LLM models, letting you combine them flexibly using the model-selection principles discussed above.
|
||||
|
||||
## 5.3 Effect Evaluation
|
||||
|
||||
For enterprises deploying RAG systems, the biggest challenge is often not building the system but tuning it. Production-grade RAG contains two nondeterministic stages, retrieval and generation, so traditional software testing is not enough. That is why building a scientific evaluation system, or RAG evaluation, is so important.
|
||||
|
||||
### 5.3.1 Beginner Example: LLM-Based RAG Evaluation
|
||||
|
||||
To help build an intuitive understanding of RAG evaluation, we can look at a simple automated pipeline based on the idea of LLM-as-a-judge:
|
||||
|
||||
https://huggingface.co/learn/cookbook/rag_evaluation
|
||||
|
||||
The process usually contains three key steps:
|
||||
|
||||
- First, synthesize an evaluation dataset by sampling documents from the knowledge base and asking an LLM to generate high-quality question-answer pairs, then filter them by relevance and groundedness to form a benchmark set.
|
||||
- Second, run the RAG system on each question in that test set and collect the generated answers.
|
||||
- Third, automate scoring by calling another LLM as a judge, comparing the generated answers with reference answers, and giving quantitative scores for dimensions such as accuracy and completeness.
|
||||
|
||||
A simple example:
|
||||
|
||||
1. Problem generation. Suppose the knowledge base contains a product manual line saying, "This device supports wireless charging and has a 5000mAh battery." We ask one model to act as an exam setter and generate a question such as, "What is the battery capacity of this device?" The standard answer is "5000mAh."
|
||||
2. Problem solving. We send that question to the RAG system, which retrieves related material and answers, for example, "The device has a 5000mAh battery."
|
||||
3. Grading. We ask another model to act as the grader by comparing the question, the generated answer, and the reference answer, using a prompt such as, "Judge whether the generated answer is correct. Output only correct or incorrect."
|
||||
|
||||
By running this process at scale, we can compute metrics such as accuracy. This forms a practical loop of evaluate, optimize, and reevaluate.
|
||||
|
||||
If you want deeper detail on RAG evaluation, including metric definitions, framework usage, and benchmark datasets, two useful survey papers are:
|
||||
|
||||
- [https://arxiv.org/pdf/2504.14891](https://arxiv.org/pdf/2504.14891), *Retrieval Augmented Generation Evaluation in the Era of Large Language Models: A Comprehensive Survey*
|
||||
- [https://arxiv.org/pdf/2405.07437](https://arxiv.org/pdf/2405.07437), *Evaluation of Retrieval-Augmented Generation: A Survey*
|
||||
|
||||
### 5.3.2 Evaluation Metrics
|
||||
|
||||
RAG evaluation fundamentally revolves around two questions: can the retrieval module find the right material, and can the generation module produce a high-quality answer from that material? Accordingly, the evaluation system is divided into retrieval evaluation and generation evaluation, supplemented by LLM-as-a-judge scoring.
|
||||
|
||||
#### Retrieval Evaluation: recall accuracy and ranking quality
|
||||
|
||||
The retrieval module is the first gate in a RAG system. Its evaluation focuses on three dimensions: whether it finds the right things, whether it finds enough of them, and whether it ranks them well.
|
||||
|
||||
**Basic recall quality metrics**
|
||||
|
||||
The classic basic metrics are Recall@K, Precision@K, and F1:
|
||||
|
||||
- **Recall@K** measures the proportion of relevant documents recovered in the top K results. If five relevant documents exist and three are found in the top 10, Recall@10 is 60 percent. This tells us how broad retrieval coverage is.
|
||||
- **Precision@K** measures the proportion of top K results that are truly relevant. If three of the top 10 are relevant and seven are not, Precision@10 is 30 percent. This reflects retrieval accuracy.
|
||||
- **F1** is the harmonic mean of Recall and Precision and balances the two.
|
||||
|
||||
These metrics are useful for quickly diagnosing baseline recall problems. If Recall is low, relevant documents were not found at all. If Precision is low, retrieval noise is too high.
|
||||
|
||||
**Ranking quality metrics**
|
||||
|
||||
Finding relevant documents is only the first step. It is even more important to put the most relevant ones near the front. For that we look at MRR, NDCG@K, and MAP:
|
||||
|
||||
- **MRR, Mean Reciprocal Rank**, measures the reciprocal of the rank position of the first relevant document. If the first relevant document appears in position 3, the reciprocal rank is 1/3. MRR is especially suitable for scenarios where one correct answer is enough.
|
||||
- **NDCG@K, Normalized Discounted Cumulative Gain**, considers both graded relevance and position discount. It not only asks whether a document is relevant, but how relevant it is, and it rewards highly relevant documents that appear early.
|
||||
- **MAP, Mean Average Precision**, is sensitive to the positions of all relevant documents and reflects overall ranking quality.
|
||||
|
||||
In actual engineering, a common combination is Recall@K plus MRR@K. For example, if Recall@10 is 80 percent but MRR@10 is only 0.3, relevant documents are being found but buried too deep, which suggests reranking needs improvement.
|
||||
|
||||
When needed, a Coverage metric can also be added to monitor knowledge-base coverage and reveal systematic blind spots.
|
||||
|
||||
#### Generation quality evaluation: accuracy and factual faithfulness
|
||||
|
||||
Retrieval provides the raw material. The next question is whether the generation module can produce a high-quality answer from those materials. The core dimensions here are answer accuracy and faithfulness to the retrieved evidence.
|
||||
|
||||
**Exact match and text similarity**
|
||||
|
||||
The simplest metric is **EM, Exact Match**, which requires the generated answer to match the reference answer exactly. This is suitable for fixed-form, uniquely correct fact questions such as dates or headquarters locations, but it is too strict because different but equally correct surface forms may fail to match.
|
||||
|
||||
That is why n-gram-overlap metrics such as **ROUGE**, **BLEU**, and **METEOR** are also commonly used. They score generated answers by comparing word overlap with reference answers. ROUGE-L pays attention to longest common subsequences, BLEU comes from machine translation and emphasizes exactness, and METEOR adds synonym and stemming considerations.
|
||||
|
||||
To overcome the limits of pure word overlap, we can also use **BERTScore** or direct vector similarity. These use pretrained semantic representations and therefore tolerate surface variation better.
|
||||
|
||||
**Factual faithfulness and hallucination detection**
|
||||
|
||||
For RAG systems, answer-reference similarity is not enough. The more important question is whether the answer is actually grounded in the retrieved documents or whether it hallucinates unsupported content.
|
||||
|
||||
That is why metrics such as **Hallucination rate** and **Faithfulness** are important. A second LLM can act as a fact checker and inspect the generated answer sentence by sentence, judging whether each claim can be supported by the retrieved documents. For high-stakes domains such as healthcare, law, and finance, this type of metric is especially important, and some enterprises even enforce hallucination thresholds as production release criteria.
|
||||
|
||||
#### LLM-as-a-Judge: multi-dimensional scoring
|
||||
|
||||
Every automatic metric has limits. Most surface-form metrics cannot fully capture semantic quality or overall usefulness. That is where LLM-as-a-judge becomes especially valuable.
|
||||
|
||||
The basic approach is to feed the question, retrieved documents, system answer, and reference answer into a strong independent model, such as GPT-4 or Claude, and ask it to score across dimensions such as:
|
||||
|
||||
- question relevance
|
||||
- information completeness
|
||||
- factual faithfulness
|
||||
- overall correctness
|
||||
|
||||
The strength of an LLM judge is that it can make a more human-like holistic judgment. Of course, judge prompts still need careful design and calibration against human-labeled examples to keep the scoring consistent and reliable.
|
||||
|
||||
#### Building a practical metric combination
|
||||
|
||||
With so many metrics available, teams often wonder which ones to use. A practical recommendation is to start with a compact combination and expand gradually:
|
||||
|
||||
- For retrieval, begin with Recall@K plus MRR@K
|
||||
- For generation, choose one or two baseline metrics from EM, ROUGE-L, and BERTScore according to task type
|
||||
- For overall evaluation, introduce an LLM judge focused on relevance, completeness, and faithfulness
|
||||
|
||||
Then iterate through a loop of evaluation, problem diagnosis, strategy adjustment, and reevaluation.
|
||||
|
||||
### 5.3.3 Evaluation Frameworks
|
||||
|
||||
As RAG has developed rapidly, both academia and industry have produced many strong evaluation frameworks. These frameworks not only package common metrics, but also offer standardized datasets, benchmark procedures, and end-to-end workflows.
|
||||
|
||||
#### A basic classification of frameworks
|
||||
|
||||
We can roughly divide RAG evaluation frameworks into three categories:
|
||||
|
||||
- **Research frameworks**, which focus on academic exploration and fine-grained diagnosis. Examples include FiD-Light and Diversity Reranker.
|
||||
- **Benchmark frameworks**, which provide standardized test sets and workflows for comparing systems horizontally. These include frameworks such as RAGAS, ARES, RGB, MultiHop-RAG, and CRUD-RAG.
|
||||
- **Tooling frameworks**, which emphasize engineering usability and integration with development frameworks. Examples include TruEra RAG Triad, LangChain Benchmarks, and RECALL.
|
||||
|
||||
In recent years, evaluation frameworks have become more specialized. For example, medicine has MedRAG, law has LegalBench-RAG, and finance has its own domain-specific frameworks. These domain frameworks often provide not only specialized datasets but also specialized metrics such as medical accuracy or legal citation relevance.
|
||||
|
||||
In practice, a good rule of thumb is:
|
||||
|
||||
- If you need a baseline quickly, start with a more general framework such as RAGAS.
|
||||
- If you are diagnosing a specific problem, choose a more targeted framework.
|
||||
- If you are in medicine, law, finance, or another professional domain, prefer domain-adapted frameworks where possible.
|
||||
- Prefer actively maintained tools with strong documentation and responsive communities.
|
||||
|
||||
Commonly recommended tools in the community include Ragas, Continuous Eval, TruLens-Eval, the evaluation features inside LlamaIndex, Phoenix, DeepEval, LangSmith, and OpenAI Evals.
|
||||
|
||||
### 5.3.4 Evaluation Benchmarks
|
||||
|
||||
The importance of evaluation benchmarks is often underestimated. Many teams start assessing a RAG system with only a handful of hand-written test questions, then discover that real online performance differs sharply from offline impressions. The root cause is that they lack representative and systematic evaluation data.
|
||||
|
||||
A benchmark that supports system iteration well usually has three core characteristics:
|
||||
|
||||
- representativeness, meaning it covers high-frequency user questions, boundary cases, and abnormal inputs
|
||||
- standardization, meaning question and answer formats, difficulty levels, and scoring rules are consistent
|
||||
- evolvability, meaning the benchmark can be updated as system capability and business needs evolve
|
||||
|
||||
For most enterprises, because business scenarios are unique, the final answer is usually to build their own evaluation datasets.
|
||||
|
||||
- Start by extracting real user questions from business logs and sampling them by type, frequency, and difficulty.
|
||||
- For simple cases, let domain experts annotate directly. For more complex questions, let a strong LLM generate candidate answers first, then have experts revise them.
|
||||
- Besides the answer itself, label metadata such as related documents, answer type, and difficulty level.
|
||||
- Update the dataset periodically with new hard cases discovered online.
|
||||
|
||||
If resources are limited and you need a fast baseline, public benchmarks are still a useful starting point. As of 2025, many public benchmarks exist for both general and vertical scenarios:
|
||||
|
||||

|
||||
|
||||
When choosing among them, first clarify the goal. Are you establishing a baseline, or validating the system before launch? Then check whether the benchmark covers the scenarios and difficulty profile you care about. For time-sensitive domains such as news or finance, make sure the benchmark includes time-sensitive tests.
|
||||
|
||||
In practice, combining your own in-domain dataset with public benchmarks is often the most robust path because it keeps evaluation close to real business needs while also preserving some horizontal comparability.
|
||||
|
||||
# 6. Deep Dive: Learning from Competitions and Open Tutorials (Optional)
|
||||
|
||||
The principles and baseline implementation above are enough to help you build a usable prototype, but they are still some distance away from solving the harder problems that appear in production. If you want to understand more practical and battle-tested RAG techniques, one of the most efficient ways is to study winning competition solutions and strong open tutorials. These solutions often concentrate the best practices discovered by strong teams after repeated attempts in real scenarios.
|
||||
|
||||
The examples below are representative rather than exhaustive. When you meet a specific problem in practice, such as PDF parsing, multimodal retrieval, or low-latency optimization, it is often effective to search for competitions related to that problem and study the technical reports and open code from winning teams.
|
||||
|
||||
## 6.1 Semantic Cache: optimizing high-frequency queries
|
||||
|
||||
Hugging Face provides a semantic-cache implementation built on top of the Chroma vector database:
|
||||
|
||||
[https://huggingface.co/learn/cookbook/semantic_cache_chroma_vector_database](https://huggingface.co/learn/cookbook/semantic_cache_chroma_vector_database)
|
||||
|
||||

|
||||
|
||||
Background: Most tutorial RAG systems are built for single-user testing. But once deployed to production, the system may receive dozens or thousands of repeated queries, for example support users repeatedly asking how refunds work. If every repeated query still triggers vector retrieval and an LLM call, latency and cost rise quickly. A semantic cache layer can sharply reduce pressure on the original data sources while preserving answer quality.
|
||||
|
||||
This design uses a two-layer retrieval architecture. The base layer stores the original knowledge base in Chroma, using a dataset such as MedQuad as an example and assigning each entry a unique ID for precise reference. The cache layer is built on FAISS using a FlatL2 index. The semantic cache sits between the user query and Chroma, rather than caching the LLM's final answer directly. That design matters because directly caching answers can break personalized answer requirements such as "explain this in simple language."
|
||||
|
||||
The cache system uses the `all-mpnet-base-v2` SentenceTransformer to generate query vectors and uses Euclidean distance, with a threshold of 0.35, to judge whether queries are similar. When the cache is full, controlled by the `max_response` parameter, the oldest entry is removed using FIFO. Cache data can also be saved into JSON files for cross-session reuse.
|
||||
|
||||
In small-scale testing, a first query such as "How do vaccines work?" took 0.057 seconds when fetched from Chroma, while a similar query served from cache took only 0.016 seconds. In large production scenarios, this approach can produce 90 to 95 percent performance optimization in high-repeat environments and significantly reduce vector-store and API cost.
|
||||
|
||||
## 6.2 Unstructured Data Processing: unified parsing for multi-format documents
|
||||
|
||||
Another Hugging Face tutorial shows how to use the Unstructured library to build a full pipeline for non-structured document processing:
|
||||
|
||||
[https://huggingface.co/learn/cookbook/rag_with_unstructured_data](https://huggingface.co/learn/cookbook/rag_with_unstructured_data)
|
||||
|
||||

|
||||
|
||||
Background: In enterprise scenarios, knowledge is often scattered across PDFs, PowerPoint decks, EPUBs, HTML pages, and many other formats. Traditional preprocessing methods either support only one format or lose crucial structural information such as tables and title hierarchy during conversion. That makes it difficult for the RAG system to understand and retrieve the content correctly.
|
||||
|
||||
This solution first downloads multi-format test documents, such as a Canadian pesticide handbook PDF containing many tables and a University of Florida citrus IPM PowerPoint file containing charts and multi-level headings. It then uses Unstructured's Local Runner for parsing. The configuration includes a processor config, a partition config that can optionally use API partition mode for stronger OCR, and a local config defining input paths. Parsed documents are converted into JSON containing typed elements such as body text, titles, and tables.
|
||||
|
||||
The system then uses `chunk_by_title`, sets a max length of 512 characters, and merges consecutive fragments shorter than 200 characters to preserve semantic coherence. During conversion into LangChain Document objects, complex metadata fields are filtered to fit Chroma. The vector stage uses the `BAAI/bge-base-en-v1.5` embedding model, together with a 4-bit quantized `Llama-3-8B-Instruct` and a LangChain RetrievalQA chain to build a complete RAG system.
|
||||
|
||||
The resulting system can handle multi-format documents accurately. For questions such as "Are aphids a pest?" it can extract key facts from the parsed documents and generate answers grounded in the relevant material. This is especially useful for enterprise knowledge bases that need to process many document types.
|
||||
|
||||
## 6.3 Enterprise document QA: high-precision and traceable RAG
|
||||
|
||||
The championship solution of the Enterprise RAG Challenge shows how to build a production-grade RAG system under strict time and precision requirements:
|
||||
|
||||
- [https://abdullin.com/ilya/how-to-build-best-rag/](https://abdullin.com/ilya/how-to-build-best-rag/)
|
||||
- [https://hustyichi.github.io/2025/07/03/rag-complete/](https://hustyichi.github.io/2025/07/03/rag-complete/)
|
||||
|
||||
Background: Contestants had to parse 100 real enterprise annual-report PDFs in 2.5 hours, each report with up to 1000 pages and containing complex financial tables, multi-column layouts, and charts. After parsing, the system had to answer 100 precise business questions with explicit answer types, such as yes-no, company names, exact numerical indicators, or executive titles, and it had to cite page numbers as evidence.
|
||||
|
||||
The winning team chose IBM's open-source Docling as the PDF parser because it performed best on complex tables and multi-column text. They improved the Docling code so it could output JSON and Markdown-plus-HTML with metadata and especially improved table parsing. To accelerate processing, they rented RTX 4090 GPUs and finished the 100-report parse in 40 minutes.
|
||||
|
||||
Text chunking used 300-token chunks with 50-token overlap and recursive splitting to preserve semantic coherence. To avoid cross-company contamination, each company had its own FAISS vector store using an `IndexFlatIP` index. Retrieval then followed three stages: retrieve Top-30 chunks by vectors, deduplicate by parent pages because multiple chunks may come from the same page, and rerank pages with GPT-4o-mini. Final ranking mixed vector retrieval and LLM reranking scores with a 0.3 to 0.7 weight split.
|
||||
|
||||
Generation used different prompt templates for different answer types. For numeric questions, such as annual revenue, the system used a five-step analysis process to ensure indicator matching, unit consistency, and cross-checking. Outputs were structured to include analysis process and page references for traceability.
|
||||
|
||||
The system won two awards and took first place on the leaderboard. An important observation was that even smaller models such as Llama 8B outperformed more than 80 percent of participants, while Llama 3.3 70B came close to GPT-4o-mini, showing that a good system design can successfully balance accuracy, efficiency, and cost.
|
||||
|
||||
## 6.4 AIOps scenario: intelligent handling of mixed text-and-image data
|
||||
|
||||
The EasyRAG project in an AIOps RAG competition focused on QA for operations scenarios:
|
||||
|
||||
[http://blog.csdn.net/hustyichi/article/details/143323746](http://blog.csdn.net/hustyichi/article/details/143323746)
|
||||
|
||||

|
||||
|
||||
Background: Operations engineers often need to read technical documents that include not only text but also monitoring charts, system architecture diagrams, and performance curves. For example, when diagnosing a system problem, the answer to "What should I do when CPU utilization exceeds 80 percent?" may be scattered between text descriptions and monitoring graphs. Traditional text-only RAG cannot understand chart trends and values, so answers remain incomplete.
|
||||
|
||||
The indexing stage used an improved SentenceSplitter with 1024-token chunks and 200-token overlap. A key innovation was adding metadata such as knowledge-base paths and file paths to each chunk, which improved recall by 2 percent. For image data, the system first used PaddleOCR to extract text from charts and screenshots, then used a multimodal model, GLM-4V-9B, to generate natural-language descriptions of the image, for example describing a CPU usage line peaking at 90 percent in the afternoon. Both the OCR text and image description were then indexed together.
|
||||
|
||||
Retrieval used a two-path BM25 plus vector strategy for broad recall. BM25 covered chunk retrieval and path retrieval, helping filter irrelevant documents by file path, while vector retrieval used `gte-Qwen2-7B-instruct`. Reranking used `bge-reranker-v2-minicpm-layerwise`, and a 28-layer setting performed best in experiments.
|
||||
|
||||
Answer generation used a two-step strategy: first generate a draft from the Top-6 documents to maximize information coverage, then optimize the answer with the Top-1 most relevant document to emphasize the core answer.
|
||||
|
||||
To handle long-text scenarios, such as a complete operations manual with hundreds of pages, the system also implemented BM25-based context compression, splitting documents into sentences, scoring sentence similarity to the query, and concatenating only the most relevant sentences. At 50 percent compression, this method achieved 86.48 percent accuracy in only 7.7 seconds and outperformed tools such as LLMLingua.
|
||||
|
||||
## 6.5 Multi-source data fusion: collaboration between structured and unstructured knowledge
|
||||
|
||||
The winning solution in the KDD Cup 2024 Meta RAG challenge showed how to integrate unstructured web content and structured knowledge graphs:
|
||||
|
||||
- [https://blog.csdn.net/m0_59164520/article/details/143694213](https://blog.csdn.net/m0_59164520/article/details/143694213)
|
||||
- https://arxiv.org/pdf/2410.00005
|
||||
|
||||

|
||||
|
||||
Background: Task 1 required retrieval summarization from five web pages. Task 2 added a mock API representing a structured knowledge graph, enabling direct access to things like movie databases and entity relationships. Task 3 raised the difficulty by using fifty web pages plus the mock API to answer more complex queries, such as identifying Nolan-directed films with box office greater than 500 million dollars. Every query had to finish within 30 seconds.
|
||||
|
||||
For Task 1, the winning team built a refined web-processing pipeline. They used BeautifulSoup to extract page text and ParentDocumentRetriever to manage parent-child chunk relationships, using 200-token child chunks for retrieval and 500 to 2000-token parent chunks for generation. The embedding model was `bge-base-en-v1.5`, the vector store was Chroma, and reranking used `bge-reranker-v2-m3`. The team also supplemented movie and finance data from public datasets and fine-tuned `Llama-3-8B-instruct` with LoRA on training data that included invalid questions and reference answers.
|
||||
|
||||
For Tasks 2 and 3, the key innovation was prioritizing the knowledge graph. The system defined standardized API calls such as `get_person` and `get_movie`, with filtering and sorting support. It first called the knowledge graph API and only fell back to web retrieval if the graph results were missing or invalid. This improved both speed and answer accuracy.
|
||||
|
||||
Because the system prioritized the knowledge graph and used structured output formats, hallucination was clearly reduced. If the graph could provide a deterministic answer directly, the system returned it without a generative step. If web retrieval was required, the answer had to follow strict citation and stepwise reasoning rules.
|
||||
|
||||
The solution won first place in all three tasks. The main lesson is that in enterprise scenarios containing both structured and unstructured data, retrieval strategy should be designed according to data type: use deterministic structured data first and treat unstructured sources as supplements.
|
||||
|
||||
Across these practical cases, several shared principles appear repeatedly:
|
||||
|
||||
- choose caching, retrieval, and generation strategies according to the business scenario
|
||||
- design dedicated parsing and indexing paths for different formats and modalities
|
||||
- treat hybrid retrieval plus reranking as a standard configuration
|
||||
- use task-specific prompting and structured outputs to improve accuracy and traceability
|
||||
|
||||
These lessons from real competitions and open projects are valuable references when building stronger enterprise RAG systems.
|
||||
|
||||
# 7. Broad Exploration: The Future Evolution of RAG (Optional)
|
||||
|
||||
Once you have learned the practical skills and optimization methods of RAG, you can already improve system performance in concrete scenarios. But understanding only local engineering tricks is not enough if you want a wider grasp of where RAG is heading. We also need to look at broader evolutionary directions.
|
||||
|
||||
RAG is now rapidly breaking beyond the traditional retrieve-text-chunks-then-generate pattern. In this section we focus on several of those paths: moving from chunk retrieval to graph-structured retrieval, combining images and audio into multimodal RAG, improving long-document handling through vectorized late chunking, and the way RAG is gradually evolving into an agent-oriented system.
|
||||
|
||||
## 7.1 Graph RAG: reshaping deep retrieval with relationship networks
|
||||
|
||||
Related research:
|
||||
|
||||
- [https://arxiv.org/pdf/2410.05779](https://arxiv.org/pdf/2410.05779)
|
||||
- [https://arxiv.org/pdf/2502.11371](https://arxiv.org/pdf/2502.11371)
|
||||
- https://arxiv.org/pdf/2404.16130
|
||||
|
||||

|
||||
|
||||
Traditional RAG works by finding text passages similar to the question, which is like picking out the few paragraphs that look most relevant from a pile of material. That works well for direct fact lookup. But if a question requires connecting multiple documents and combining different clues, performance drops.
|
||||
|
||||
For example, a doctor might ask, "Based on these cases and the latest treatment guidelines, how should we evaluate the benefits and risks of a certain drug for elderly patients?" Or a project team might ask, "Looking across the past two years of requirements documents, review records, and online issue reports, which part of our system architecture fails most often?" Questions like these are not about finding a single sentence. They require identifying the people, objects, events, and relationships scattered across multiple materials and forming a complete picture.
|
||||
|
||||
Graph RAG builds that picture proactively. The system uses a large model to identify key entities from text, such as people, organizations, functional modules, events, and data, together with their relationships, such as causality, dependence, change, and contradiction. It then builds a knowledge network that grows as more material is added. Through automatic grouping, closely related entities and relationships are organized into themes, and each theme can be summarized in advance. When a user asks a question, the system no longer searches only for text passages that look similar. It first finds the most relevant entities and local graph structure, expands through related topic groups, and then gives the analysis path, node descriptions, and source passages together to the LLM for reasoning.
|
||||
|
||||
Under this framework, Graph RAG and traditional RAG complement one another. Traditional RAG remains strong for detail questions whose answers can be found in one step. Graph RAG is closer to how a human researcher thinks: first organize the overall structure and themes, then fill in evidence, and finally produce a conclusion with logic and conditions. Existing comparisons show that in multi-hop reasoning tasks, Graph RAG often covers more critical content and provides a broader perspective. Flexible combination of the two approaches is often better than using only one.
|
||||
|
||||
## 7.2 Multimodal RAG
|
||||
|
||||
Related research:
|
||||
|
||||
- https://arxiv.org/pdf/2502.08826
|
||||
|
||||

|
||||
|
||||
Real-world data is never only text. Engineers diagnosing server failures need to look at temperature curves, device screenshots, and logs together. Doctors making diagnoses need CT or MRI images, test reports, and electronic medical records at the same time. Traditional text RAG can at best retrieve phrases such as "temperature anomaly" or "suspected lung nodule," but it struggles to connect those descriptions to the actual chart trend or image lesion shape, and it cannot reverse-search documents or knowledge from images, audio, or video.
|
||||
|
||||
Multimodal RAG solves this problem of different modalities being unable to "see" one another. Its core is cross-modal semantic alignment. The system uses suitable encoders for images, video, audio, and text, together with OCR, ASR, and layout analysis, extracts key information from visual and audio sources, and maps different modalities into a shared semantic space where a unified multimodal index can be built.
|
||||
|
||||
At retrieval and generation time, whether the user asks for a chart showing a sales peak in Q3 2023 or uploads a sketch or operating video, the system first finds the closest multimodal evidence in that unified space, filters it by signals such as text similarity and image similarity, keeps the most useful pieces, and then gives those images, text passages, and tables together to a multimodal LLM. The model can then answer by combining evidence across modalities and ideally indicate the source or highlight relevant areas in the image or document.
|
||||
|
||||
Compared with text-only RAG, multimodal RAG can use more kinds of evidence and often reduces hallucination while producing more complete and more verifiable answers.
|
||||
|
||||
## 7.3 Late Chunking: preserving full context for long documents
|
||||
|
||||
Related introduction:
|
||||
|
||||
- https://jina.ai/news/late-chunking-in-long-context-embedding-models/
|
||||
|
||||

|
||||
|
||||
Imagine reading a Wikipedia article about Berlin. Traditional RAG would first cut it into independent paragraphs and then embed each chunk. If the first sentence says "Berlin is the capital of Germany," later phrases such as "the city" or "its population" lose their connection to Berlin once separated. A query such as "What is the population of Berlin?" may then fail because the term Berlin and the population information never appeared inside the same chunk. This problem becomes even worse for long documents. In a 200-page insurance contract, the definition of a deductible may appear on page 5 while the conditions under which it applies appear on page 30. Fixed-length chunking can split these related pieces into dozens of isolated chunks, and experiments show that semantic similarity can collapse sharply when that happens.
|
||||
|
||||
Late Chunking overturns the traditional chunk-first-then-embed pipeline and instead follows embed-first-then-chunk. With long-context embedding models that can handle something like 8192 tokens, the whole document is first passed through the Transformer, producing token-level embeddings that have already seen the full document. Only afterward are those globally informed token embeddings pooled into chunk embeddings according to chunk boundaries. The resulting chunks are no longer independent islands. They are context-dependent embeddings that preserve cross-paragraph references and conceptual relationships.
|
||||
|
||||
On BEIR benchmark datasets, Late Chunking outperforms traditional chunking broadly, with especially strong gains on longer documents. In short-text scenarios, the difference largely disappears, which confirms a key rule: the longer the document, the bigger the advantage of Late Chunking. The method is now integrated into Jina Embeddings v3. Although encoding a whole long document first can increase inference time by 10 to 20 percent, the retrieval gains in scenarios such as medical records, legal documents, and technical manuals can easily justify that cost.
|
||||
|
||||
Late Chunking shows that 8K-plus long-context embedding models are not overengineering in these scenarios. They are often necessary for producing high-quality chunk embeddings and represent a shift from chunk first, then embed, to embed first, then chunk.
|
||||
|
||||
## 7.4 From RAG to RAG in the Agent Era
|
||||
|
||||
Related discussions:
|
||||
|
||||
- [https://ragflow.io/blog/rag-at-the-crossroads-mid-2025-reflections-on-ai-evolution](https://ragflow.io/blog/rag-at-the-crossroads-mid-2025-reflections-on-ai-evolution)
|
||||
- [https://arxiv.org/pdf/2501.09136](https://arxiv.org/pdf/2501.09136)
|
||||
- [https://www.letta.com/blog/rag-vs-agent-memory](https://www.letta.com/blog/rag-vs-agent-memory)
|
||||
- [https://www.linkedin.com/posts/richmondalake_100daysofagentmemory-rag-memorizz-activity-7348281860843577346-LM7Y/](https://www.linkedin.com/posts/richmondalake_100daysofagentmemory-rag-memorizz-activity-7348281860843577346-LM7Y/)
|
||||
- https://www.llamaindex.ai/blog/rag-is-dead-long-live-agentic-retrieval
|
||||
|
||||
RAG has developed from a retrieval-augmented generation tool into a key part of an agent's cognitive architecture. Traditional RAG is built on a simple ask, retrieve, answer pattern and is fundamentally passive. It waits for a query and does not act proactively. To break through that passivity and handle more complex cognitive tasks, RAG has been deeply combined with agent capabilities, giving rise to a new paradigm: Agentic RAG.
|
||||
|
||||
Under this paradigm, the role of RAG changes fundamentally. It is no longer only a passive provider of external knowledge. Instead, it becomes the core processing unit that supports intelligent behavior under the agent's active planning, goal direction, and self-reflection. This fusion gives the overall system goal orientation, iterative optimization, and autonomous decision-making, greatly deepening the quality of human-AI interaction. Agentic RAG can understand complex tasks, decompose them, plan retrieval strategies, and evaluate the quality of initial results to decide whether deeper exploration is needed.
|
||||
|
||||

|
||||
|
||||
The key to this capability is a multi-layered active loop. Faced with a complex query, the agent first analyzes the nature of the problem, breaks it into subproblems, and designs precise retrieval strategies for each subproblem. After receiving initial results, it evaluates them, judges whether the information is complete and relevant, identifies knowledge gaps, and dynamically generates more precise new queries. This iterative process often includes multi-hop retrieval, where one round of results reveals new directions for the next round, producing a knowledge exploration chain similar to how a human researcher works.
|
||||
|
||||
To support this ongoing, iterative intelligent behavior, especially when personalization and long-term knowledge accumulation matter, short-term conversation context alone is far from enough. This leads to the need for long-term, structured memory.
|
||||
|
||||
That is exactly why RAG is increasingly assigned the role of an agent's long-term memory system and used to build a full external memory architecture. This long-term memory complements short-term memory, which is responsible for maintaining the current dialogue context. The long-term memory system relies on three key mechanisms:
|
||||
|
||||
1. Structured indexing ability:
|
||||
This allows the agent to build multi-dimensional indexes over huge amounts of unstructured data, by time, topic, entity relations, and more, supporting efficient retrieval from multiple angles much like humans recall information through different clues.
|
||||
2. Intelligent forgetting:
|
||||
Through value-evaluation algorithms, the system can decay or selectively discard low-frequency, weakly related, or outdated information, keeping the memory system lean and efficient and preventing overload.
|
||||
3. Knowledge consolidation:
|
||||
The system refines scattered dialogue and interaction experience into structured knowledge. Through entity recognition, relation extraction, and semantic clustering, fragmented information is connected into knowledge graphs, turning short-term experience into long-term knowledge.
|
||||
|
||||
This external memory system built on RAG not only expands an agent's cognitive boundary significantly, but also gives it the ability to continue learning and evolving its knowledge. It allows the agent to accumulate experience over long-term interaction, form personalized operating patterns and domain knowledge systems, and support more complex and longer-running tasks.
|
||||
|
||||
# Summary
|
||||
|
||||
Retrieval-Augmented Generation is not only a technical method for compensating for hallucination and knowledge staleness in large models. It is also a key bridge for turning general AI capability into deep enterprise value. The evolution from Naive RAG to modular and agentic forms shows that every part of RAG needs to deepen continuously, including finer data handling, more scientific model selection across embedding, rerank, and LLM stages, and more systematic evaluation. All of these are necessary steps toward building enterprise knowledge systems that are controllable, trustworthy, and efficient. At the same time, drawing lessons from competitions and engineering case studies is one of the best ways to deepen understanding of the technical details.
|
||||
|
||||
As Graph RAG, multimodal understanding, and Late Chunking continue to develop and combine, RAG is steadily pushing beyond the old retrieval-and-generation boundary and moving toward deeper semantic association and more sustainable memory capability. The hope is that this survey-style article helps you build a full-chain methodology, from principle to practice and from evaluation to evolution, so that in a fast-moving technical landscape you can build high-quality intelligent applications that truly land in the real world and can handle complex business challenges.
|
||||
|
||||
# Reference
|
||||
|
||||
[1] Ask in Any Modality: A Comprehensive Survey on Multimodal Retrieval-Augmented Generation.
|
||||
|
||||
https://arxiv.org/pdf/2502.08826
|
||||
|
||||
[2] Retrieving Multimodal Information for Augmented Generation: A Survey.
|
||||
|
||||
https://arxiv.org/pdf/2303.10868
|
||||
|
||||
[3] A Survey on RAG Meeting LLMs: Towards Retrieval-Augmented Large Language Models.
|
||||
|
||||
https://arxiv.org/pdf/2405.06211
|
||||
|
||||
[4] Retrieval-Augmented Generation for Large Language Models: A Survey.
|
||||
|
||||
https://arxiv.org/pdf/2312.10997
|
||||
|
||||
[5] LightRAG: Simple and Fast Retrieval-Augmented Generation.
|
||||
|
||||
https://arxiv.org/pdf/2410.05779
|
||||
|
||||
[6] Agentic Retrieval-Augmented Generation: A Survey on Agentic RAG.
|
||||
|
||||
https://arxiv.org/pdf/2501.09136
|
||||
|
||||
[7] ERAGent: Enhancing Retrieval-Augmented Language Models with Improved Accuracy, Efficiency, and Personalization.
|
||||
|
||||
https://arxiv.org/pdf/2405.06683
|
||||
|
||||
[8] Graph Retrieval-Augmented Generation: A Survey.
|
||||
|
||||
https://www.arxiv.org/pdf/2408.08921
|
||||
|
||||
[9] Evaluation of Retrieval-Augmented Generation: A Survey.
|
||||
|
||||
https://arxiv.org/pdf/2405.07437
|
||||
|
||||
[10] Retrieval Augmented Generation Evaluation in the Era of Large Language Models: A Comprehensive Survey.
|
||||
|
||||
https://arxiv.org/pdf/2504.14891
|
||||
|
||||
[11] From Local to Global: A Graph RAG Approach to Query-Focused Summarization.
|
||||
|
||||
https://arxiv.org/pdf/2404.16130
|
||||
|
||||
[12] RAG vs. GraphRAG: A Systematic Evaluation and Key Insights.
|
||||
|
||||
https://arxiv.org/pdf/2502.11371
|
||||
|
||||
[13] Introduction to RAG | LlamaIndex Python Documentation.
|
||||
|
||||
https://developers.llamaindex.ai/python/framework/understanding/rag/
|
||||
|
||||
[14] All-in-RAG | A Full-Stack Guide to RAG in Large-Model Application Development.
|
||||
|
||||
https://datawhalechina.github.io/all-in-rag/#/en/
|
||||
|
||||
[15] Ilya Rice: How I Won the Enterprise RAG Challenge.
|
||||
|
||||
https://abdullin.com/ilya/how-to-build-best-rag/
|
||||
|
||||
[16] RAG Research Table - Awesome Generative AI Guide (GitHub).
|
||||
|
||||
https://github.com/aishwaryanr/awesome-generative-ai-guide/blob/main/research_updates/rag_research_table.md
|
||||
|
||||
[17] RAG is dead, long live agentic retrieval.
|
||||
|
||||
https://www.llamaindex.ai/blog/rag-is-dead-long-live-agentic-retrieval
|
||||
|
||||
[18] LLM/RAG Zoomcamp extra lesson 5: Common evaluation methods and market preferences in RAG evolution.
|
||||
|
||||
https://vip.studycamp.tw/t/llmrag-zoomcamp-%E8%AA%B2%E5%A4%96%E8%A3%9C%E5%85%85-5%EF%BC%9Arag-evolution-%E5%B8%B8%E8%A6%8B%E8%A9%95%E4%BC%B0%E6%96%B9%E6%B3%95%E5%92%8C%E5%B8%82%E5%A0%B4%E5%81%8F%E5%A5%BD/8185
|
||||
|
||||
[19] How to Evaluate Retrieval Augmented Generation (RAG) Applications.
|
||||
|
||||
https://zilliz.com.cn/blog/how-to-evaluate-rag-zilliz
|
||||
|
||||
[20] RAG is not Agent Memory.
|
||||
|
||||
https://www.letta.com/blog/rag-vs-agent-memory
|
||||
|
||||
[21] Richmond Alake. LinkedIn post on #100DaysOfAgentMemory, RAG and MemoRizz.
|
||||
|
||||
https://www.linkedin.com/posts/richmondalake_100daysofagentmemory-rag-memorizz-activity-7348281860843577346-LM7Y/
|
||||
@@ -0,0 +1,3 @@
|
||||
# Intermediate and Advanced RAG with Workflow Orchestration - Using LangGraph as an Example
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,706 @@
|
||||
# Claude Agent SDK Complete Guide
|
||||
|
||||
## Introduction
|
||||
|
||||
You may already have used Claude's basic API: send one message, get one reply, just like chatting. But if you want Claude to help you read files, run commands, search code, fix bugs, verify the result itself, and continue iterating, this kind of "autonomous work" is not something the basic API can do.
|
||||
|
||||
Claude Agent SDK is built exactly for this scenario. It packages all of Claude Code's capabilities - reading and writing files, executing commands, searching code, editing files, browsing the web - into a programmable library. You do not need to write the tool-calling loop yourself. Claude can execute tools autonomously and iterate autonomously until the task is truly completed.
|
||||
|
||||
One-sentence summary: the basic SDK is "you ask, it answers"; the Agent SDK is "you assign, it works."
|
||||
|
||||
---
|
||||
|
||||
## What Is the Difference from the Basic SDK?
|
||||
|
||||
Look at the code first, and the difference is obvious:
|
||||
|
||||
```python
|
||||
# Basic anthropic SDK: you must write your own loop to handle tool calls
|
||||
import anthropic
|
||||
|
||||
client = anthropic.Anthropic()
|
||||
response = client.messages.create(
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "Fix the bug in auth.py"}],
|
||||
tools=[...] # You must define tools yourself
|
||||
)
|
||||
# Claude asks to call some tool
|
||||
while response.stop_reason == "tool_use":
|
||||
result = your_tool_executor(response.tool_use) # You must execute it yourself
|
||||
response = client.messages.create(tool_result=result, **params) # You must feed it back yourself
|
||||
```
|
||||
|
||||
```python
|
||||
# Agent SDK: one block and done, Claude reads files, finds bugs, and edits code by itself
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions
|
||||
|
||||
async for message in query(
|
||||
prompt="Fix the bug in auth.py",
|
||||
options=ClaudeAgentOptions(allowed_tools=["Read", "Edit", "Bash"]),
|
||||
):
|
||||
print(message) # Claude reads files, locates issues, and edits code by itself
|
||||
```
|
||||
|
||||
The difference is clear:
|
||||
|
||||
| Comparison Item | Basic anthropic SDK | Claude Agent SDK |
|
||||
|--------|-------------------|-----------------|
|
||||
| Tool execution | You implement it | Claude handles it |
|
||||
| Tool loop | You implement it | Built-in agent loop |
|
||||
| Built-in tools | None, all self-defined | Read/write files, Bash, search, and more out of the box |
|
||||
| Context management | You maintain it | Auto compression and auto management |
|
||||
| Best for | Chat, generation, simple tool use | Autonomously completing complex tasks |
|
||||
|
||||
---
|
||||
|
||||
## How Is It Different from Other Agent Frameworks?
|
||||
|
||||
There are many Agent frameworks on the market - LangChain, LlamaIndex, CrewAI, AutoGPT, and more. What is unique about Claude Agent SDK compared with them?
|
||||
|
||||
> 📚 **For a detailed comparison, see the appendix**: [Mainstream Agent Framework Comparison](/en/appendix/8-artificial-intelligence/ai-agents.html)
|
||||
|
||||
In short:
|
||||
|
||||
| Framework | Best-Fit Scenario |
|
||||
|------|-------------|
|
||||
| **Claude Agent SDK** | Let Claude autonomously complete coding, file operations, and command execution |
|
||||
| **LangChain** | Build complex general AI apps with highly customized flows |
|
||||
| **CrewAI** | Simulate multi-role collaboration scenarios (virtual teams, role-playing) |
|
||||
| **LlamaIndex** | Build knowledge-base QA systems that connect enterprise data with LLMs |
|
||||
|
||||
---
|
||||
|
||||
## Installation and Configuration
|
||||
|
||||
### Installation
|
||||
|
||||
Python needs 3.10+, and TypeScript needs Node.js 18+:
|
||||
|
||||
```bash
|
||||
# Python
|
||||
pip install claude-agent-sdk
|
||||
|
||||
# TypeScript
|
||||
npm install @anthropic-ai/claude-agent-sdk
|
||||
```
|
||||
|
||||
### Authentication
|
||||
|
||||
Just set the API key environment variable:
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_API_KEY=your-api-key
|
||||
```
|
||||
|
||||
Cloud-platform authentication is also supported:
|
||||
- AWS Bedrock: set `CLAUDE_CODE_USE_BEDROCK=1` + AWS credentials
|
||||
- Google Vertex AI: set `CLAUDE_CODE_USE_VERTEX=1` + GCP credentials
|
||||
- Microsoft Azure: set `CLAUDE_CODE_USE_FOUNDRY=1` + Azure credentials
|
||||
|
||||
### Custom API Endpoint
|
||||
|
||||
If you use a proxy, gateway, or self-hosted API endpoint, you can change the default API URL through the `env` parameter:
|
||||
|
||||
```python
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions
|
||||
|
||||
async for message in query(
|
||||
prompt="Hello",
|
||||
options=ClaudeAgentOptions(
|
||||
env={
|
||||
"ANTHROPIC_BASE_URL": "https://your-proxy.example.com",
|
||||
"ANTHROPIC_API_KEY": "your-api-key",
|
||||
}
|
||||
),
|
||||
):
|
||||
print(message)
|
||||
```
|
||||
|
||||
`ClaudeAgentOptions` does not have a direct `base_url` parameter, but the `env` field can pass arbitrary environment variables into the underlying Claude Code CLI. Common environment variables:
|
||||
|
||||
| Environment Variable | Purpose |
|
||||
|---------|------|
|
||||
| `ANTHROPIC_BASE_URL` | Custom API endpoint (proxy, gateway) |
|
||||
| `ANTHROPIC_API_KEY` | API key |
|
||||
| `ANTHROPIC_AUTH_TOKEN` | Alternative auth token |
|
||||
| `ANTHROPIC_CUSTOM_HEADERS` | Custom request headers |
|
||||
|
||||
---
|
||||
|
||||
## Core Concepts
|
||||
|
||||
The Agent SDK runtime principle can be summarized in one sentence: **collect context -> execute actions -> verify results -> repeat**.
|
||||
|
||||
This is exactly how human developers work: read code first, then modify code, then run tests and check results. If it is wrong, keep iterating. Agent SDK automates this loop.
|
||||
|
||||
### Two Usage Modes
|
||||
|
||||
**Mode 1: `query()` function - stateless, suitable for one-off tasks**
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions
|
||||
|
||||
async def main():
|
||||
async for message in query(
|
||||
prompt="What files are in this directory?",
|
||||
options=ClaudeAgentOptions(allowed_tools=["Bash", "Glob"]),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
**Mode 2: `ClaudeSDKClient` - stateful, suitable for multi-turn conversation**
|
||||
|
||||
Use this when you need to preserve context and interact across multiple turns. For example, first ask Claude to read one module, then ask it to find all call sites of that module - in the second turn it still remembers what it read in the first turn.
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions
|
||||
|
||||
async def main():
|
||||
session_id = None
|
||||
|
||||
# Turn 1: read the auth module
|
||||
async for message in query(
|
||||
prompt="Read the authentication module code",
|
||||
options=ClaudeAgentOptions(allowed_tools=["Read", "Glob"]),
|
||||
):
|
||||
if hasattr(message, "subtype") and message.subtype == "init":
|
||||
session_id = message.session_id
|
||||
|
||||
# Turn 2: continue based on previous context
|
||||
async for message in query(
|
||||
prompt="Find all places that call it",
|
||||
options=ClaudeAgentOptions(resume=session_id),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Built-in Tools: Ready to Use
|
||||
|
||||
This is one of the best parts of Agent SDK - you do not need to implement any tools yourself, Claude can use them directly:
|
||||
|
||||
| Tool | Capability | Typical Use |
|
||||
|------|------|---------|
|
||||
| Read | Read files | View code, read configs |
|
||||
| Write | Create files | Generate new files |
|
||||
| Edit | Precise file edits | Bug fixes, refactoring |
|
||||
| Bash | Run terminal commands | Run tests, install dependencies, git operations |
|
||||
| Glob | Pattern-based file search | `**/*.py`, `src/**/*.ts` |
|
||||
| Grep | Regex content search | Find function definitions, TODOs |
|
||||
| WebSearch | Search web pages | Look up docs, find approaches |
|
||||
| WebFetch | Fetch web content | Read online docs |
|
||||
| Task | Launch sub-agents | Parallelize sub-tasks |
|
||||
|
||||
Use `allowed_tools` to control which tools the agent can use:
|
||||
|
||||
```python
|
||||
# Read-only agent: can inspect but cannot modify
|
||||
options = ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Glob", "Grep"],
|
||||
permission_mode="bypassPermissions"
|
||||
)
|
||||
|
||||
# Full agent: can read, write, and execute commands
|
||||
options = ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep"]
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Hooks: Insert Your Own Logic at Key Points
|
||||
|
||||
Hooks let you inject custom code at critical moments of agent execution - for example, logging, intercepting risky operations, and auditing file changes.
|
||||
|
||||
Supported hook types include: `PreToolUse` (before tool execution), `PostToolUse` (after tool execution), `Stop` (when the agent stops), `SessionStart`, `SessionEnd`, and more.
|
||||
|
||||
```python
|
||||
from datetime import datetime
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, HookMatcher
|
||||
|
||||
# Record an audit log every time a file is modified
|
||||
async def log_file_change(input_data, tool_use_id, context):
|
||||
file_path = input_data.get("tool_input", {}).get("file_path", "unknown")
|
||||
with open("./audit.log", "a") as f:
|
||||
f.write(f"{datetime.now()}: modified {file_path}\n")
|
||||
return {}
|
||||
|
||||
async def main():
|
||||
async for message in query(
|
||||
prompt="Refactor utils.py for better readability",
|
||||
options=ClaudeAgentOptions(
|
||||
permission_mode="acceptEdits",
|
||||
hooks={
|
||||
"PostToolUse": [
|
||||
HookMatcher(matcher="Edit|Write", hooks=[log_file_change])
|
||||
]
|
||||
},
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
```
|
||||
|
||||
Real-world uses:
|
||||
- Audit logging: record every operation performed by the agent
|
||||
- Security interception: block modifications to critical files
|
||||
- Notification push: send messages when agent tasks complete
|
||||
- Cost monitoring: count tool calls and token usage
|
||||
|
||||
### Sub-Agents: Split Big Tasks Across Specialists
|
||||
|
||||
When a task is complex enough, you can define multiple specialized sub-agents and let the main agent delegate sub-tasks to them. Each sub-agent has its own instructions and tool permissions, isolated from each other.
|
||||
|
||||
```python
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, AgentDefinition
|
||||
|
||||
async for message in query(
|
||||
prompt="Use the code-reviewer agent to review this project's code quality",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Glob", "Grep", "Task"],
|
||||
agents={
|
||||
"code-reviewer": AgentDefinition(
|
||||
description="Professional code reviewer responsible for quality and security reviews",
|
||||
prompt="Analyze code quality, identify potential issues, and provide improvement suggestions.",
|
||||
tools=["Read", "Glob", "Grep"],
|
||||
),
|
||||
"test-writer": AgentDefinition(
|
||||
description="Testing specialist responsible for writing unit tests",
|
||||
prompt="Write unit tests for functions that are missing tests.",
|
||||
tools=["Read", "Write", "Bash"],
|
||||
),
|
||||
},
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
```
|
||||
|
||||
Messages from sub-agents include a `parent_tool_use_id` field, making it easy to track which messages came from which sub-agent.
|
||||
|
||||
### MCP Integration: Connect to the Outside World
|
||||
|
||||
Through Model Context Protocol (MCP), your agent can connect to external systems such as databases, browsers, and third-party APIs. The community already provides [hundreds of MCP servers](https://github.com/modelcontextprotocol/servers) you can use directly.
|
||||
|
||||
```python
|
||||
# Connect Playwright so the agent can operate a browser
|
||||
async for message in query(
|
||||
prompt="Open example.com and describe what you see",
|
||||
options=ClaudeAgentOptions(
|
||||
mcp_servers={
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": ["@playwright/mcp@latest"]
|
||||
}
|
||||
}
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
```
|
||||
|
||||
Common MCP integration scenarios:
|
||||
- Playwright: browser automation, scraping pages, filling forms
|
||||
- PostgreSQL/MySQL: direct database querying and operations
|
||||
- Slack/Email: sending notifications and messages
|
||||
- GitHub: operating PRs, Issues, and repositories
|
||||
|
||||
---
|
||||
|
||||
## What Can You Build with It? Practical Scenarios
|
||||
|
||||
After understanding features, the most important question is: what can this actually do? Below are real scenarios validated by the community.
|
||||
|
||||
### Scenario 1: Automatic Bug-Fix Agent
|
||||
|
||||
Give it a bug description, and it can find code, locate the issue, fix it, and run tests to verify:
|
||||
|
||||
```python
|
||||
async for message in query(
|
||||
prompt="Users report occasional HTTP 500 errors during login. Investigate and fix code under src/auth/",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Edit", "Bash", "Glob", "Grep"],
|
||||
permission_mode="acceptEdits",
|
||||
),
|
||||
):
|
||||
print(message)
|
||||
```
|
||||
|
||||
Claude will grep logs, read related code, find the bug, modify code, and run tests to confirm the fix.
|
||||
|
||||
### Scenario 2: Code Review Agent
|
||||
|
||||
Build a read-only code review agent that audits quality without making any modifications:
|
||||
|
||||
```python
|
||||
async for message in query(
|
||||
prompt="Review code under src/ with focus on security vulnerabilities, performance issues, and coding conventions",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Glob", "Grep"],
|
||||
permission_mode="bypassPermissions",
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
print(message.result)
|
||||
```
|
||||
|
||||
### Scenario 3: CI/CD Integration
|
||||
|
||||
In a CI pipeline, let the agent analyze failing tests and attempt automatic fixes:
|
||||
|
||||
```python
|
||||
async for message in query(
|
||||
prompt="Run npm test, analyze failing test cases, and fix the code so all tests pass",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Edit", "Bash", "Glob"],
|
||||
max_turns=20,
|
||||
),
|
||||
):
|
||||
print(message)
|
||||
```
|
||||
|
||||
This is a major advantage of Agent SDK over CLI - CLI is good when a human sits at the terminal, while SDK is ideal for embedding into automated workflows.
|
||||
|
||||
### Scenario 4: Research Agent
|
||||
|
||||
Let the agent search the web, read documentation, synthesize information, and produce a report:
|
||||
|
||||
```python
|
||||
async for message in query(
|
||||
prompt="Research mainstream Python Web frameworks in 2026. Compare FastAPI, Django, and Litestar, then write a technical selection report to report.md",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["WebSearch", "WebFetch", "Write"],
|
||||
),
|
||||
):
|
||||
print(message)
|
||||
```
|
||||
|
||||
### Scenario 5: Full-Stack Agent with Browser Capability
|
||||
|
||||
By connecting Playwright through MCP, the agent can not only write code but also open a browser to verify results:
|
||||
|
||||
```python
|
||||
async for message in query(
|
||||
prompt="Fix the homepage style issue, then open a browser and take screenshots to verify the result",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Edit", "Bash"],
|
||||
mcp_servers={
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": ["@playwright/mcp@latest"]
|
||||
}
|
||||
},
|
||||
),
|
||||
):
|
||||
print(message)
|
||||
```
|
||||
|
||||
### Scenario Quick Reference
|
||||
|
||||
| Scenario | Core Tools | Difficulty |
|
||||
|------|---------|------|
|
||||
| Auto bug fixing | Read, Edit, Bash, Grep | Beginner |
|
||||
| Code review | Read, Glob, Grep | Beginner |
|
||||
| CI/CD auto-fix | Read, Edit, Bash | Intermediate |
|
||||
| Technical research report | WebSearch, WebFetch, Write | Beginner |
|
||||
| Browser automation | MCP (Playwright) | Intermediate |
|
||||
| Multi-agent collaboration | Task + AgentDefinition | Advanced |
|
||||
| Database operations | MCP (PostgreSQL/MySQL) | Intermediate |
|
||||
| Email/notification assistant | MCP (Slack/Email) | Intermediate |
|
||||
|
||||
---
|
||||
|
||||
## When Should You Use Agent SDK?
|
||||
|
||||
Not every scenario needs Agent SDK. Choosing the right tool matters:
|
||||
|
||||
| What You Want to Do | Recommended Tool |
|
||||
|-----------|---------|
|
||||
| Simple chat, text generation, translation | Basic `anthropic` SDK |
|
||||
| One-shot tool use (weather lookup, arithmetic) | Basic `anthropic` SDK |
|
||||
| Autonomously complete multi-step development tasks | Agent SDK |
|
||||
| Embed into CI/CD pipelines | Agent SDK |
|
||||
| Build apps that operate on a file system | Agent SDK |
|
||||
| Daily interactive development | Claude Code CLI |
|
||||
| One-off quick tasks | Claude Code CLI |
|
||||
|
||||
In short: if your task requires Claude to "work hands-on" by itself (reading files, editing code, running commands), use Agent SDK. If you only need Q&A, the basic SDK is enough.
|
||||
|
||||
---
|
||||
|
||||
## Enterprise Practice: Building a Code-Quality Guardrail Pipeline
|
||||
|
||||
The previous scenarios all used one agent for one job. In real enterprise environments, what you need is a full pipeline - multiple agents chained together, each stage with clear input/output, plus auditing, rollback, and notifications.
|
||||
|
||||
Now we will build a real scenario: after each PR submission, automatically trigger **code review -> security scan -> auto-fix -> test verification -> report generation** as a complete pipeline.
|
||||
|
||||
### Architecture Design
|
||||
|
||||
```text
|
||||
PR submitted
|
||||
│
|
||||
▼
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Code Review │───▶│ Security Scan│───▶│ Auto Fix │
|
||||
│ Agent │ │ Agent │ │ Agent │
|
||||
│ (read-only) │ │ (read-only) │ │ (writable) │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐ ┌─────────────┐
|
||||
│ Test Verify │───▶│ Report Build │
|
||||
│ Agent │ │ Agent │
|
||||
│ (Bash) │ │ (Write) │
|
||||
└─────────────┘ └─────────────┘
|
||||
│
|
||||
▼
|
||||
Slack notification
|
||||
```
|
||||
|
||||
Core idea: **each agent does one thing, permissions are minimized, and results are passed in sequence**.
|
||||
|
||||
### Step 1: Define the Pipeline Framework
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, HookMatcher
|
||||
|
||||
# Audit log: record every operation by every agent
|
||||
audit_log = []
|
||||
|
||||
async def audit_hook(input_data, tool_use_id, context):
|
||||
audit_log.append({
|
||||
"time": datetime.now().isoformat(),
|
||||
"tool": input_data.get("tool_name"),
|
||||
"input": input_data.get("tool_input", {}),
|
||||
})
|
||||
return {}
|
||||
|
||||
# Shared hook config: all agents share audit capability
|
||||
audit_hooks = {
|
||||
"PostToolUse": [HookMatcher(matcher=".*", hooks=[audit_hook])]
|
||||
}
|
||||
```
|
||||
|
||||
### Step 2: Code Review Agent (Read-Only)
|
||||
|
||||
```python
|
||||
async def run_code_review(pr_diff: str) -> str:
|
||||
"""Read-only agent, reviews code quality and outputs a structured report"""
|
||||
result_text = ""
|
||||
async for message in query(
|
||||
prompt=f"""Review the following PR diff from these dimensions:
|
||||
1. Code conventions: naming, formatting, comments
|
||||
2. Logic issues: edge cases, null pointer risks, race conditions
|
||||
3. Performance risks: N+1 queries, memory leaks, unnecessary loops
|
||||
4. Maintainability: oversized functions, unclear responsibilities, magic numbers
|
||||
|
||||
PR Diff:
|
||||
{pr_diff}
|
||||
|
||||
Output JSON format: {{"issues": [{{"severity": "high/medium/low", "file": "...", "line": ..., "description": "..."}}], "summary": "..."}}""",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Glob", "Grep"],
|
||||
permission_mode="bypassPermissions",
|
||||
hooks=audit_hooks,
|
||||
max_turns=10,
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
result_text = message.result
|
||||
return result_text
|
||||
```
|
||||
|
||||
### Step 3: Security Scan Agent (Read-Only)
|
||||
|
||||
```python
|
||||
async def run_security_scan() -> str:
|
||||
"""Read-only agent focused on vulnerability scanning"""
|
||||
result_text = ""
|
||||
async for message in query(
|
||||
prompt="""Scan the project code for security vulnerabilities:
|
||||
1. SQL injection, XSS, CSRF
|
||||
2. Hardcoded keys or credentials
|
||||
3. Insecure dependency versions
|
||||
4. Missing permission checks
|
||||
|
||||
Output JSON: {{"vulnerabilities": [{{"severity": "critical/high/medium", "type": "...", "file": "...", "description": "...", "fix_suggestion": "..."}}]}}""",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Glob", "Grep", "Bash"],
|
||||
permission_mode="bypassPermissions",
|
||||
hooks=audit_hooks,
|
||||
max_turns=15,
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
result_text = message.result
|
||||
return result_text
|
||||
```
|
||||
|
||||
### Step 4: Auto-Fix Agent (Writable)
|
||||
|
||||
```python
|
||||
async def run_auto_fix(review_result: str, security_result: str) -> str:
|
||||
"""Writable agent that auto-fixes code based on review and scan results"""
|
||||
result_text = ""
|
||||
async for message in query(
|
||||
prompt=f"""Fix code according to the following review results:
|
||||
|
||||
Code review report:
|
||||
{review_result}
|
||||
|
||||
Security scan report:
|
||||
{security_result}
|
||||
|
||||
Fix rules:
|
||||
1. Only fix issues with severity high or critical
|
||||
2. Run related tests after each change to ensure no existing functionality is broken
|
||||
3. Do not refactor unrelated code, apply minimal fixes only
|
||||
4. Output the list of modified files after completion""",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Edit", "Bash", "Glob", "Grep"],
|
||||
permission_mode="acceptEdits",
|
||||
hooks=audit_hooks,
|
||||
max_turns=30,
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
result_text = message.result
|
||||
return result_text
|
||||
```
|
||||
|
||||
### Step 5: Test Verification + Report Generation
|
||||
|
||||
```python
|
||||
async def run_test_and_report(fix_result: str) -> str:
|
||||
"""Run tests and generate final report"""
|
||||
result_text = ""
|
||||
async for message in query(
|
||||
prompt=f"""Execute these actions:
|
||||
1. Run the full test suite (npm test or pytest)
|
||||
2. Compute test pass rate
|
||||
3. Generate a Markdown quality report into pr-report.md, including:
|
||||
- Count of issues found in code review and severity distribution
|
||||
- Number of security vulnerabilities
|
||||
- Auto-fix changes: {fix_result}
|
||||
- Test pass rate
|
||||
- Final conclusion: whether merge is recommended""",
|
||||
options=ClaudeAgentOptions(
|
||||
allowed_tools=["Read", "Bash", "Write", "Glob"],
|
||||
hooks=audit_hooks,
|
||||
max_turns=15,
|
||||
),
|
||||
):
|
||||
if hasattr(message, "result"):
|
||||
result_text = message.result
|
||||
return result_text
|
||||
```
|
||||
|
||||
### Step 6: Chain the Whole Pipeline
|
||||
|
||||
```python
|
||||
import subprocess
|
||||
|
||||
async def run_pipeline():
|
||||
"""Full PR quality-guard pipeline"""
|
||||
print("🔍 Stage 1/4: code review...")
|
||||
pr_diff = subprocess.run(
|
||||
["git", "diff", "main...HEAD"], capture_output=True, text=True
|
||||
).stdout
|
||||
review_result = await run_code_review(pr_diff)
|
||||
|
||||
print("🛡️ Stage 2/4: security scan...")
|
||||
security_result = await run_security_scan()
|
||||
|
||||
print("🔧 Stage 3/4: auto-fix...")
|
||||
fix_result = await run_auto_fix(review_result, security_result)
|
||||
|
||||
print("✅ Stage 4/4: test verification + report generation...")
|
||||
report = await run_test_and_report(fix_result)
|
||||
|
||||
# Save audit log
|
||||
with open("audit-log.json", "w") as f:
|
||||
json.dump(audit_log, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"Pipeline finished, audit log saved ({len(audit_log)} operation records)")
|
||||
return report
|
||||
|
||||
asyncio.run(run_pipeline())
|
||||
```
|
||||
|
||||
### Enterprise Design Thinking
|
||||
|
||||
This pipeline reflects several key enterprise design principles:
|
||||
|
||||
**Least privilege**: code-review and security-scan agents are read-only and cannot accidentally modify code. Only the auto-fix agent has write permission, and even that is constrained by `acceptEdits`.
|
||||
|
||||
**Auditable**: every step of every agent is logged through Hooks. If anything goes wrong, you can trace which agent did what and when.
|
||||
|
||||
**Result chaining**: each agent's output becomes the next agent's input. Review results feed auto-fix; auto-fix results feed test verification. Every stage has a clear input/output contract.
|
||||
|
||||
**Cost control**: every agent has a `max_turns` limit to prevent runaway loops. In production, you can also add `max_budget_usd` for budget control.
|
||||
|
||||
**Extensibility**: want another stage, such as a "documentation-check agent" or "performance benchmark agent"? Add a new function and insert it into the pipeline.
|
||||
|
||||
This model can be embedded directly into GitHub Actions or GitLab CI, automatically triggered on each PR, truly achieving "AI-driven code quality guardrails."
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
Agent SDK provides clear exception types so you can build robust fault tolerance in production:
|
||||
|
||||
```python
|
||||
from claude_agent_sdk import query, CLINotFoundError, ProcessError
|
||||
|
||||
try:
|
||||
async for msg in query(prompt="Analyze code"):
|
||||
print(msg)
|
||||
except CLINotFoundError:
|
||||
print("Claude Code CLI is not installed. Please install it first.")
|
||||
except ProcessError as e:
|
||||
print(f"Process exited unexpectedly with exit code: {e.exit_code}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
The core value of Claude Agent SDK is upgrading "model reasoning" into "controlled execution." It does not just generate text. It can truly complete tasks inside an auditable, constrained tool system.
|
||||
|
||||
Remember a line from Anthropic's official blog: the Agent SDK design philosophy is "give the agent a computer and let it work like a human."
|
||||
|
||||
A good agent application = clear tool design + explicit task boundaries + appropriate human supervision. Tools give the agent capability, boundaries give it constraints, and supervision gives you confidence. None of the three can be missing.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Official Resources
|
||||
|
||||
- [Agent SDK Official Docs](https://platform.claude.com/docs/en/agent-sdk/overview) - the most authoritative reference
|
||||
- [GitHub - claude-agent-sdk-python](https://github.com/anthropics/claude-code-sdk-python) - Python SDK source
|
||||
- [GitHub - claude-agent-sdk-typescript](https://github.com/anthropics/claude-agent-sdk-typescript) - TypeScript SDK source
|
||||
- [Agent SDK Demo Projects](https://github.com/anthropics/claude-agent-sdk-demos) - email assistant, research agent, and more
|
||||
|
||||
### Blogs and Tutorials
|
||||
|
||||
- [Building agents with the Claude Agent SDK](https://claude.com/blog/building-agents-with-the-claude-agent-sdk) - Anthropic engineering blog on design philosophy and architecture
|
||||
- [Claude Agent SDK Python Study Guide](https://redreamality.com/blog/claude-agent-sdk-python-) - Chinese-friendly full tutorial from zero
|
||||
- [Claude Agent SDK Full Tutorial](https://blog.wenhaofree.com/en/posts/articles/claude-agent-sdk-tutorial/) - practical guide to tool systems, Agent Loop, and controlled execution
|
||||
- [12 Practical Agent SDK Scenarios](https://skywork.ai/blog/claude-agent-sdk-use-cases-2025/) - covers coding, data, automation, and more
|
||||
- [Step-by-Step Agent Tutorial](https://skywork.ai/blog/how-to-use-claude-agent-sdk-step-by-step-ai-agent-tutorial/) - TypeScript + Python dual-track tutorial
|
||||
@@ -0,0 +1,776 @@
|
||||
# How to Make Claude Code Work for Long Durations
|
||||
|
||||
## Introduction
|
||||
|
||||
Traditional AI coding assistants are "conversational": you say one thing, it replies once, and then stops. But for real development tasks, this mode is far from enough.
|
||||
|
||||
Imagine these scenarios: you want Claude to refactor an entire project, but it edits a few files and says "done"; you want Claude to keep fixing bugs until all tests pass, but it runs once and stops; you want Claude to "work overnight," but next morning you find it stopped long ago.
|
||||
|
||||
In the summer of 2025, an Australian developer named Geoffrey Huntley (who is also a sheep farmer) wrote a 5-line bash script. The script was simple: continuously restart Claude Code and feed it the same task. He named it "Ralph Wiggum," after the Simpsons character who keeps trying and never gives up.
|
||||
|
||||
This simple script shocked Silicon Valley. In just two weeks, related projects got 7,000+ GitHub stars. People used it to generate 6 complete projects overnight, delivered $50,000 contract work with only $297 API cost, and even used it to build a complete programming language in 3 months.
|
||||
|
||||
The core question this chapter solves is: how to make Claude Code work continuously like a real developer until tasks are truly complete.
|
||||
|
||||
---
|
||||
|
||||
## Core Principle: Why Does AI "Stop Too Early"?
|
||||
|
||||
Before discussing specific methods, first understand the root cause.
|
||||
|
||||
### AI's completion judgment is unreliable
|
||||
|
||||
LLMs have a fundamental weakness: they cannot reliably judge whether work is truly complete.
|
||||
|
||||
Human completion criteria are objective: all tests pass, features are complete, and code quality meets standards. But AI can only judge by "feeling." It may stop because "this looks about right," or because "output seems enough," or because it does not know what to do next.
|
||||
|
||||
That is why we need an external system to determine real completion rather than relying on AI's internal sense.
|
||||
|
||||
### The core idea of the solution
|
||||
|
||||
The core solution is to keep AI working inside a "loop."
|
||||
|
||||
Whenever it tries to exit, the external system checks three questions: is it truly complete? does it meet objective criteria? is anything missing? If not, inject the task again and continue another round.
|
||||
|
||||
This idea can be implemented in many forms, from simple bash scripts to complex orchestration systems, but the essence is the same.
|
||||
|
||||
---
|
||||
|
||||
## Method 1: While True Bash Loop (Most Primitive Method)
|
||||
|
||||
This is the simplest and most direct implementation. Essentially, write an infinite loop that restarts Claude Code each round and feeds the same task description.
|
||||
|
||||
The simplest implementation is only 5 lines:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
while true; do
|
||||
cat PROMPT.md | claude
|
||||
done
|
||||
```
|
||||
|
||||
### How it works
|
||||
|
||||
The script flow is straightforward. Step 1 reads the task description from `PROMPT.md`. Step 2 launches Claude Code and passes the task description in. Step 3 Claude works and outputs results. Step 4 Claude exits after finishing. Step 5 the loop automatically restarts and returns to step 1, creating an infinite cycle unless you interrupt manually with `Ctrl+C`.
|
||||
|
||||
### Pros and cons
|
||||
|
||||
The advantage is extreme simplicity: anyone can understand it, no configuration needed, immediately usable, and good for quick experiments.
|
||||
|
||||
But the disadvantages are obvious: it cannot judge real completion, it may spin forever, it has no safety guardrails, and it can waste API calls.
|
||||
|
||||
### Real usage example
|
||||
|
||||
First, create a `PROMPT.md` file to describe your task. For example, refactoring a user auth module:
|
||||
|
||||
```markdown
|
||||
# Task: Refactor user authentication module
|
||||
|
||||
Requirements:
|
||||
1. Extract all authentication logic into an independent AuthService class
|
||||
2. Add unit tests, coverage > 80%
|
||||
3. Update related documentation
|
||||
|
||||
When all tests pass and docs are updated, output: task complete
|
||||
```
|
||||
|
||||
Then create and run the loop script:
|
||||
|
||||
```bash
|
||||
chmod +x loop.sh
|
||||
./loop.sh
|
||||
```
|
||||
|
||||
### Safer improved version
|
||||
|
||||
To avoid endless loops, add an iteration cap:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
MAX_ITERATIONS=50
|
||||
iteration=0
|
||||
|
||||
while true; do
|
||||
iteration=$((iteration + 1))
|
||||
echo "=== Iteration $iteration/$MAX_ITERATIONS ==="
|
||||
|
||||
cat PROMPT.md | claude
|
||||
|
||||
if [ $iteration -ge $MAX_ITERATIONS ]; then
|
||||
echo "Reached maximum iterations, stopping"
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 5 # small delay to avoid API rate limits
|
||||
done
|
||||
```
|
||||
|
||||
This improved version adds a max-iteration limit, shows per-round progress, and stops automatically at the limit. It also adds a 5-second delay each loop to avoid rate limiting.
|
||||
|
||||
---
|
||||
|
||||
## Method 2: Ralph Wiggum Plugin (Official Recommendation)
|
||||
|
||||
Ralph Wiggum is an official Anthropic plugin built specifically for long-running tasks. It is named after the Simpsons character, representing the spirit of "keep trying despite failure."
|
||||
|
||||
### Core mechanism: Stop Hook
|
||||
|
||||
The core of Ralph is Stop Hook. When Claude wants to exit, Stop Hook intercepts the exit signal. Then the system checks: did output include the specific completion marker? If no marker is found, it reinjects the original prompt and starts another iteration. Only when the completion marker is detected is Claude allowed to exit.
|
||||
|
||||
This guarantees Claude does not stop just because it "feels close enough." It must complete clearly marked requirements.
|
||||
|
||||
### Installation
|
||||
|
||||
Ralph Wiggum is an official Claude Code plugin and can be installed in two ways.
|
||||
|
||||
**Option 1: install from official plugin marketplace (recommended)**
|
||||
|
||||
```bash
|
||||
# run in Claude Code
|
||||
claude
|
||||
|
||||
# add official plugin marketplace
|
||||
/plugin marketplace add anthropics/claude-code
|
||||
|
||||
# install Ralph Wiggum
|
||||
/plugin install ralph-wiggum@claude-code-plugins
|
||||
|
||||
# verify installation
|
||||
/plugin
|
||||
```
|
||||
|
||||
**Option 2: install directly from GitHub**
|
||||
|
||||
```bash
|
||||
# enter plugin directory
|
||||
cd ~/.claude/plugins/
|
||||
|
||||
# clone plugin repo
|
||||
git clone https://github.com/anthropics/ralph-wiggum-plugin.git
|
||||
```
|
||||
|
||||
After installation, you can use:
|
||||
|
||||
- `/ralph-wiggum:ralph-loop` - start loop
|
||||
- `/ralph-wiggum:cancel-ralph` - cancel loop
|
||||
- `/ralph-wiggum:help` - show help
|
||||
|
||||
### Basic usage
|
||||
|
||||
Use `/ralph-wiggum:ralph-loop`:
|
||||
|
||||
```bash
|
||||
/ralph-wiggum:ralph-loop "Build a todo API with CRUD operations, input validation, and tests.
|
||||
Output <promise>COMPLETE</promise> when everything is done." \
|
||||
--max-iterations 50 \
|
||||
--completion-promise "COMPLETE"
|
||||
```
|
||||
|
||||
### Parameter explanation
|
||||
|
||||
The two most important parameters are `--max-iterations` and `--completion-promise`.
|
||||
|
||||
`--max-iterations` sets the hard safety cap. Recommended values are typically 20-100. Even if unfinished, Ralph stops at this limit to prevent infinite API spending.
|
||||
|
||||
`--completion-promise` specifies the completion marker text, which must be explicit and unique. Ralph treats the task as complete only when Claude output contains that marker. Use clear markers such as `COMPLETE` or `TASK_DONE`, and avoid ambiguous words.
|
||||
|
||||
### Prompt best practices
|
||||
|
||||
Writing good prompts is key to Ralph success.
|
||||
|
||||
Bad prompts usually do not define completion criteria. For example, "write a todo API" may lead AI to output a rough skeleton and stop, with no tests, no verification, and no docs.
|
||||
|
||||
Good prompts should include phased requirements and clear acceptance criteria. For example:
|
||||
|
||||
Describe phased tasks first. Phase 1 is core functionality with all CRUD endpoints: POST `/todos` create, GET `/todos` list, GET `/todos/:id` fetch single, PUT `/todos/:id` update, DELETE `/todos/:id` delete. Phase 2 is input validation: title cannot be empty, completion status must be boolean. Phase 3 is tests: write tests for each endpoint, with coverage > 80%.
|
||||
|
||||
Then define acceptance criteria: all tests pass, code passes linter, README includes API docs.
|
||||
|
||||
Finally define a unique completion marker: `<promise>TODO_API_COMPLETE</promise>`.
|
||||
|
||||
This way Claude knows exactly what to do and when completion is truly achieved.
|
||||
|
||||
### More prompt templates
|
||||
|
||||
Here are common task templates you can use directly or adapt.
|
||||
|
||||
**Template 1: test migration (Jest -> Vitest)**
|
||||
|
||||
```text
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Migrate all tests in this project from Jest to Vitest:
|
||||
- Keep all test logic unchanged
|
||||
- Update config files (vite.config.js, vitest.config.js)
|
||||
- Replace Jest-specific APIs (e.g., jest.mock -> vi.mock)
|
||||
- Ensure all tests pass
|
||||
- Remove Jest-related dependencies
|
||||
|
||||
Acceptance criteria:
|
||||
- npm test passes fully
|
||||
- no Jest dependency in package.json
|
||||
- project builds successfully
|
||||
|
||||
Output after completion: <promise>VITEST_MIGRATION_COMPLETE</promise>
|
||||
" --max-iterations 40 --completion-promise "VITEST_MIGRATION_COMPLETE"
|
||||
```
|
||||
|
||||
**Template 2: UI/UX optimization (mobile-first)**
|
||||
|
||||
```text
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Polish this project's UI/UX into a refined mobile-first language learning app:
|
||||
- unify spacing and whitespace (use 4px base unit)
|
||||
- establish clear type hierarchy (title/body/auxiliary text)
|
||||
- unify styles for cards, lists, and shared components
|
||||
- add bottom navigation (Home/Learn/Quiz/Progress/Settings)
|
||||
- ensure mobile rendering quality
|
||||
|
||||
Acceptance criteria:
|
||||
- npm run build succeeds
|
||||
- no TypeScript errors
|
||||
- key pages preview correctly on mobile
|
||||
|
||||
Output after completion: <promise>UI_UX_COMPLETE</promise>
|
||||
" --max-iterations 25 --completion-promise "UI_UX_COMPLETE"
|
||||
```
|
||||
|
||||
**Template 3: bulk TypeScript annotation**
|
||||
|
||||
```text
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Add TypeScript type annotations to all functions in the project:
|
||||
- prioritize src/ directory
|
||||
- add types for function params and return values
|
||||
- avoid any, use concrete types or unknown
|
||||
- add necessary type definitions
|
||||
|
||||
Acceptance criteria:
|
||||
- npm run typecheck passes
|
||||
- no @ts-ignore or @ts-any comments
|
||||
- code runs correctly
|
||||
|
||||
Output after completion: <promise>TYPES_ADDED</promise>
|
||||
" --max-iterations 30 --completion-promise "TYPES_ADDED"
|
||||
```
|
||||
|
||||
**Template 4: TDD-driven feature development**
|
||||
|
||||
```text
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Implement checkout functionality using TDD:
|
||||
1. Write tests first (checkout.test.ts)
|
||||
2. Run tests (should fail)
|
||||
3. Write minimal code to pass tests
|
||||
4. Refactor and optimize
|
||||
5. Repeat until all tests pass
|
||||
|
||||
Feature requirements:
|
||||
- shopping cart item list
|
||||
- shipping fee calculation
|
||||
- coupon application
|
||||
- payment form validation
|
||||
|
||||
Acceptance criteria:
|
||||
- all tests pass (npm test checkout.test.ts)
|
||||
- code coverage > 80%
|
||||
- no ESLint errors
|
||||
|
||||
Output after completion: <promise>CHECKOUT_COMPLETE</promise>
|
||||
" --max-iterations 25 --completion-promise "CHECKOUT_COMPLETE"
|
||||
```
|
||||
|
||||
**Template 5: code style unification**
|
||||
|
||||
```text
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Unify code style across the project:
|
||||
- format all files with Prettier
|
||||
- unify naming conventions (variables camelCase, components PascalCase)
|
||||
- remove unused imports and variables
|
||||
- unify string quotes (single quotes)
|
||||
- unify semicolon style (no semicolons)
|
||||
|
||||
Acceptance criteria:
|
||||
- npm run lint passes
|
||||
- consistent code style
|
||||
- build succeeds
|
||||
|
||||
Output after completion: <promise>STYLE_UNIFIED</promise>
|
||||
" --max-iterations 20 --completion-promise "STYLE_UNIFIED"
|
||||
```
|
||||
|
||||
### Real-world cases
|
||||
|
||||
One famous case happened at a Y Combinator hackathon, where a team used Ralph Loop. At 11 PM, they set a task: implement MVPs for 6 product specs in sequence and emit specific completion markers for each one. They set max iterations to 200 and went to sleep.
|
||||
|
||||
The next morning, they had 6 demo-ready projects, and API cost was only $297. That is Ralph's power: while you sleep, AI keeps working.
|
||||
|
||||
Another case came from Boris Cherny (Claude Code lead). With Ralph plus Opus 4.5, he delivered 259 PRs in 30 days, including 497 commits, adding 40,000 lines and deleting 38,000 lines. Most strikingly, all of it was produced by Claude Code without manually writing code.
|
||||
|
||||
An even wilder case is the CURSED programming language. Ralph creator Geoffrey Huntley used Ralph Loop over 3 months to autonomously build a full programming language. Its keywords use Gen Z slang (such as `slay`, `sus`, `based`), and more importantly it includes a full LLVM compiler implementation, standard library, and partial editor support. This demonstrates Ralph Loop's true potential: if you provide a clear target, it can keep working for months until a complex project is truly finished.
|
||||
|
||||
### More real-world cases
|
||||
|
||||
**Automated project refactor**
|
||||
|
||||
One developer used Ralph to refactor a legacy project with messy code, no tests, and missing documentation. The assigned tasks were:
|
||||
|
||||
1. Add tests for existing code
|
||||
2. Refactor step by step, ensuring tests pass after each change
|
||||
3. Update documentation
|
||||
|
||||
Ralph ran over a full weekend. By Monday, there were 47 commits, cleaner code structure, 75% test coverage, and complete API docs. Cost was around $12.
|
||||
|
||||
### Ralph philosophy
|
||||
|
||||
Ralph reflects three core philosophies.
|
||||
|
||||
The first is iteration over perfection. Do not expect perfection in one pass; use loops to improve. The first pass may only build a skeleton, second fixes bugs, third optimizes, fourth adds tests; every round gets better.
|
||||
|
||||
The second is failure as data. Every test failure is an opportunity to improve; do not fear failure, learn from it.
|
||||
|
||||
The third is persistent trying: keep trying until it works. That is Ralph spirit.
|
||||
|
||||
### When Ralph is suitable or unsuitable
|
||||
|
||||
Knowing where Ralph fits helps save both time and cost.
|
||||
|
||||
**Suitable scenarios for Ralph**
|
||||
|
||||
These tasks have clear completion criteria and are good for automatic iteration:
|
||||
|
||||
| Scenario | Why |
|
||||
|------|------|
|
||||
| Test migration | Clear target framework, validated by passing tests |
|
||||
| Large refactors | Specific refactor rules can be defined |
|
||||
| Framework migration | Successful migration is verifiable by working code |
|
||||
| Bulk type annotation | Done when typecheck passes |
|
||||
| Test coverage improvement | Coverage percentage is objective |
|
||||
| Documentation generation | API docs can be automatically validated |
|
||||
| UI/UX unification | Concrete design rules can be defined |
|
||||
| Bug fixes with repro | Pass condition is testable |
|
||||
|
||||
**Unsuitable scenarios for Ralph**
|
||||
|
||||
These tasks require human judgment or exploration:
|
||||
|
||||
| Scenario | Why |
|
||||
|------|------|
|
||||
| Architecture decisions | e.g., microservices vs monolith requires trade-off judgment |
|
||||
| Security-sensitive code | Vulnerabilities can be subtle and hard to detect automatically |
|
||||
| Ambiguous requirements | No clear completion criteria |
|
||||
| Exploratory work | Direction changes continuously |
|
||||
| Creative design | Requires human aesthetic judgment |
|
||||
| Simple one-off tasks | Using Ralph is overkill |
|
||||
|
||||
**Decision checklist**
|
||||
|
||||
Ask yourself three questions:
|
||||
1. **Can I define explicit completion criteria?** If not, not suitable
|
||||
2. **Is there an objective validation method?** (tests/build/typecheck) If not, not suitable
|
||||
3. **Does this task require continuous human feedback?** If yes, not suitable
|
||||
|
||||
If all three answers are "no," let Ralph run.
|
||||
|
||||
---
|
||||
|
||||
## Method 3: Enhanced Ralph
|
||||
|
||||
This is a community-enhanced implementation of official Ralph. The [frankbria/ralph-claude-code](https://github.com/frankbria/ralph-claude-code) project adds stronger safety mechanisms.
|
||||
|
||||
### Additional features
|
||||
|
||||
Enhanced Ralph adds several extra safety features.
|
||||
|
||||
First is dual exit conditions. Official Ralph checks only the completion marker, but the enhanced version requires both the completion marker and explicit `EXIT_SIGNAL` before stopping. This means even if Claude outputs completion marker, loop can continue for additional verification unless explicit exit appears.
|
||||
|
||||
Second is rate limiting. Default is 100 runs/hour, preventing runaway API bills if a bug causes endless loops. You can adjust this limit.
|
||||
|
||||
Third is a smart circuit breaker. If the system detects completion marker 5 consecutive times, it force-stops. This prevents rare edge cases where loops fail to terminate correctly.
|
||||
|
||||
Fourth is a real-time dashboard. Enhanced Ralph provides a command-line dashboard showing current iterations, task progress, and estimated cost.
|
||||
|
||||
### Installation
|
||||
|
||||
Install enhanced Ralph by cloning from GitHub:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/frankbria/ralph-claude-code.git
|
||||
cd ralph-claude-code
|
||||
./install.sh
|
||||
```
|
||||
|
||||
The install script sets required files and configuration automatically.
|
||||
|
||||
### Usage
|
||||
|
||||
Enhanced Ralph usage has two steps. First initialize project with `ralph-setup`:
|
||||
|
||||
```bash
|
||||
ralph-setup my-project
|
||||
```
|
||||
|
||||
This creates required config files in project. Then start loop with `ralph loop`:
|
||||
|
||||
```bash
|
||||
ralph loop
|
||||
```
|
||||
|
||||
### Configuration file
|
||||
|
||||
Enhanced Ralph uses `.claude/ralph-config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"maxIterations": 50,
|
||||
"rateLimitPerHour": 100,
|
||||
"completionPromise": "TASK_COMPLETE",
|
||||
"exitSignal": "EXIT_NOW",
|
||||
"costAlertThresholds": [10, 50, 100]
|
||||
}
|
||||
```
|
||||
|
||||
`maxIterations` is max loop count. `rateLimitPerHour` is hourly rate cap. `completionPromise` is completion marker text. `exitSignal` is explicit exit signal. `costAlertThresholds` defines budget warning levels.
|
||||
|
||||
---
|
||||
|
||||
## Method 4: Agent Teams (Parallel Multi-Agent)
|
||||
|
||||
When tasks are large enough, a single Claude is not enough; you need "team collaboration."
|
||||
|
||||
Agent Teams is an advanced capability that lets multiple Claude instances run in parallel and coordinate through shared task lists and dependencies. This is suitable for very large projects. In Nicholas Carlini's experiment, 16 parallel agents produced 100,000+ lines of code in two weeks and built a C compiler capable of compiling the Linux kernel.
|
||||
|
||||
Agent Teams is more complex, and we will cover it in detail in the next section: "3.3 Agent Teams Multi-Agent Collaboration."
|
||||
|
||||
---
|
||||
|
||||
## Method 5: Background Tasks (Ctrl+B)
|
||||
|
||||
This is a simple and practical non-blocking execution method.
|
||||
|
||||
### Basic operation
|
||||
|
||||
Usage is straightforward. When Claude starts a task, press `Ctrl+B` to push it to background.
|
||||
|
||||
For example, you say: "Run full test suite." Claude begins running. You press `Ctrl+B`, and Claude replies: "Task pushed to background (ID: task_abc123)." Then you can continue: "Meanwhile, analyze this log file." Claude can analyze logs while tests continue in background.
|
||||
|
||||
### Viewing background tasks
|
||||
|
||||
There are several ways to check background tasks. Use `/tasks` to list all tasks with task ID, state, and start time. Press `Ctrl+T` for quick status summary. You can also bring a task back to foreground to inspect live output.
|
||||
|
||||
### Suitable scenarios
|
||||
|
||||
Background tasks are good for typical situations:
|
||||
|
||||
First, long-running tests. Full suites may take tens of minutes, and background mode avoids blocking.
|
||||
|
||||
Second, large project builds. Build pipelines can run while you continue other work.
|
||||
|
||||
Third, batch file operations such as mass rename and formatting.
|
||||
|
||||
Fourth, anything you do not want to wait for synchronously.
|
||||
|
||||
---
|
||||
|
||||
## Safety Mechanisms: Preventing Infinite Loops
|
||||
|
||||
Any automated loop system must include protections, otherwise it may run out of control.
|
||||
|
||||
### Hard limits
|
||||
|
||||
The most basic protection is setting `--max-iterations` (maximum loop count). This is mandatory. Regardless of completion state, task stops at this cap and prevents unlimited API spending.
|
||||
|
||||
You can also enforce time limits, for example auto-stop after 4 hours. You can also set budget alerts that pause and notify at spend thresholds (for example 10 USD, 50 USD, 100 USD).
|
||||
|
||||
### Intelligent detection
|
||||
|
||||
You can add smart dead-loop detection. For example, check whether recent commits include meaningful changes:
|
||||
|
||||
```bash
|
||||
if [ $(git diff HEAD~5 | wc -l) -eq 0 ]; then
|
||||
echo "No substantive changes in the last 5 commits, possible loop"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
If recent diffs are minimal, system may be stuck and should stop with alert.
|
||||
|
||||
### Cost alerts
|
||||
|
||||
Set cost alert thresholds in config:
|
||||
|
||||
```json
|
||||
{
|
||||
"costAlertThresholds": [10, 50, 100],
|
||||
"alertAction": "pause_and_notify"
|
||||
}
|
||||
```
|
||||
|
||||
When spending reaches 10, 50, or 100 USD, system pauses and notifies so you can decide whether to continue.
|
||||
|
||||
### Manual checkpoints
|
||||
|
||||
For important tasks, add manual checkpoints:
|
||||
|
||||
```bash
|
||||
if [ $((iteration % 10)) -eq 0 ]; then
|
||||
read -p "Completed $iteration iterations. Continue? (y/n)" answer
|
||||
if [ "$answer" != "y" ]; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
```
|
||||
|
||||
This pauses every 10 iterations for confirmation, allowing timely human intervention.
|
||||
|
||||
---
|
||||
|
||||
## Practical Build: Complete BBS Forum with Ralph Loop
|
||||
|
||||
Let's use a full example to show Ralph Loop power. We will build a BBS-style forum system from scratch, including user auth, posting, profile center, and admin backend.
|
||||
|
||||
### Project objective
|
||||
|
||||
Build a fully functional BBS forum system with:
|
||||
|
||||
**User-side features:**
|
||||
- user registration, login, logout
|
||||
- browse post list (pagination)
|
||||
- view post detail
|
||||
- publish new posts
|
||||
- comment feature
|
||||
- profile center (view own posts, update profile)
|
||||
|
||||
**Admin backend features:**
|
||||
- admin login
|
||||
- user management (ban/unban)
|
||||
- post management (delete/pin)
|
||||
- comment management
|
||||
- system statistics
|
||||
|
||||
**Tech stack:**
|
||||
- backend: Node.js + Express + SQLite
|
||||
- frontend: React + React Router + Axios
|
||||
- auth: JWT token
|
||||
- styling: Tailwind CSS
|
||||
|
||||
### Preparation
|
||||
|
||||
First install Ralph Wiggum plugin:
|
||||
|
||||
```bash
|
||||
claude /plugins:add ralph-wiggum
|
||||
```
|
||||
|
||||
### Start Ralph Loop
|
||||
|
||||
Now launch Ralph Loop to build the whole project:
|
||||
|
||||
```bash
|
||||
/ralph-wiggum:ralph-loop "
|
||||
Please build a complete BBS forum system from scratch using TDD.
|
||||
|
||||
Project structure requirements:
|
||||
- backend/ directory: Express API server
|
||||
- frontend/ directory: React frontend app
|
||||
- both directories have their own tests
|
||||
|
||||
Backend requirements:
|
||||
- use Express framework
|
||||
- SQLite storage (better-sqlite3)
|
||||
- JWT auth (jsonwebtoken + bcrypt)
|
||||
- user table: id, username, password, email, role, createdAt
|
||||
- post table: id, title, content, authorId, category, pinned, createdAt
|
||||
- comment table: id, content, postId, authorId, createdAt
|
||||
|
||||
Backend API endpoints:
|
||||
- POST /api/auth/register - user register
|
||||
- POST /api/auth/login - user login
|
||||
- GET /api/posts - get post list (pagination + category filter)
|
||||
- GET /api/posts/:id - get post detail
|
||||
- POST /api/posts - create post (auth required)
|
||||
- PUT /api/posts/:id - edit post (author or admin)
|
||||
- DELETE /api/posts/:id - delete post (author or admin)
|
||||
- POST /api/posts/:id/comments - add comment (auth required)
|
||||
- GET /api/user/profile - get profile (auth required)
|
||||
- PUT /api/user/profile - update profile (auth required)
|
||||
- GET /api/admin/stats - admin statistics (admin only)
|
||||
- GET /api/admin/users - user list (admin only)
|
||||
- PUT /api/admin/users/:id/ban - ban user (admin only)
|
||||
|
||||
Frontend page requirements:
|
||||
- /login - login page
|
||||
- /register - register page
|
||||
- / - home page (post list)
|
||||
- /post/:id - post detail
|
||||
- /new - publish post
|
||||
- /profile - profile center
|
||||
- /admin - admin panel (admin permission required)
|
||||
|
||||
Admin panel features:
|
||||
- user management (view, ban, unban)
|
||||
- post management (view, delete, pin)
|
||||
- comment management (view, delete)
|
||||
- system statistics (user count, post count, comment count)
|
||||
|
||||
TDD requirements:
|
||||
- write tests first, then implementation
|
||||
- each feature must have corresponding tests
|
||||
- backend uses Jest, API tests cover all endpoints
|
||||
- frontend uses Vitest, component tests cover major features
|
||||
- auth middleware must have tests
|
||||
|
||||
Acceptance criteria:
|
||||
- npm test (backend) passes
|
||||
- npm test (frontend) passes
|
||||
- frontend starts and works correctly
|
||||
- backend API responds correctly
|
||||
- proper permission isolation between normal users and admin
|
||||
- code passes ESLint checks
|
||||
|
||||
Output after completion: <promise>BBS_SYSTEM_COMPLETE</promise>
|
||||
" --max-iterations 150 --completion-promise "BBS_SYSTEM_COMPLETE"
|
||||
```
|
||||
|
||||
### Expected time
|
||||
|
||||
Based on complexity:
|
||||
|
||||
**If coded manually**: about 40-60 hours (including schema design, auth system, frontend/backend integration, and testing)
|
||||
|
||||
**Using Ralph Loop**:
|
||||
- base version (core features): around 3-5 hours
|
||||
- full version (admin backend + tests): around 6-10 hours
|
||||
|
||||
### Monitoring progress
|
||||
|
||||
While Ralph Loop is running, you can monitor progress in several ways:
|
||||
|
||||
**Iteration count**: Ralph shows current and max iterations, which helps estimate remaining time.
|
||||
|
||||
**Logs**: you can see what Claude is doing now, such as designing schema, writing APIs, building components, and fixing bugs.
|
||||
|
||||
**Test status**: every test run result is shown. Passing tests increase and failing tests decrease. When failures begin to drop, project is approaching completion.
|
||||
|
||||
### Post-completion verification
|
||||
|
||||
After Ralph outputs completion marker, perform manual verification:
|
||||
|
||||
```bash
|
||||
# backend tests
|
||||
cd backend
|
||||
npm test
|
||||
|
||||
# frontend tests
|
||||
cd frontend
|
||||
npm test
|
||||
|
||||
# start backend
|
||||
cd backend
|
||||
npm start
|
||||
|
||||
# start frontend (in another terminal)
|
||||
cd frontend
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Open browser and test:
|
||||
|
||||
1. register a new user
|
||||
2. login
|
||||
3. browse posts
|
||||
4. publish new post
|
||||
5. add comment
|
||||
6. open profile center
|
||||
7. logout and login as admin (default account: admin/admin123)
|
||||
8. test admin backend features
|
||||
|
||||
### Notes
|
||||
|
||||
Ralph Loop is powerful, but keep these points in mind:
|
||||
|
||||
**First, more detailed prompts produce better results.** Ambiguous prompts require more iterations for correction.
|
||||
|
||||
**Second, set reasonable iteration caps.** BBS systems are complex; recommend at least 100 iterations.
|
||||
|
||||
**Third, TDD is recommended.** Writing tests first can significantly reduce debugging time.
|
||||
|
||||
**Fourth, final manual verification is required.** AI may miss edge cases or special scenarios, especially in security-sensitive paths.
|
||||
|
||||
**Fifth, pay close attention to schema design.** Ralph may need several iterations before landing on a robust schema.
|
||||
|
||||
---
|
||||
|
||||
## Method Comparison and Selection
|
||||
|
||||
Each method has its own characteristics and fits different scenarios.
|
||||
|
||||
While True Loop is the simplest: only 5 lines to run, good for quick experiments and prototypes. But it is limited and does not detect real completion, relying only on iteration caps.
|
||||
|
||||
Ralph Wiggum is the general recommendation for most scenarios. It has a complete Stop Hook mechanism, supports completion-marker checks, has official support, and solid docs.
|
||||
|
||||
Enhanced Ralph is better for production environments, with dual exit conditions, rate limits, and smart circuit breakers.
|
||||
|
||||
Background tasks are useful for simple non-blocking execution: just press `Ctrl+B`. But it is only background execution, not iterative loop orchestration.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
The core idea for making Claude Code work long-term is simple: do not ask it to "finish in one shot," ask it to "keep trying until true completion."
|
||||
|
||||
All methods are fundamentally doing the same thing: give Claude a task, let it run, check whether completion is real, and if not, continue the next round.
|
||||
|
||||
Which method to choose depends on your needs.
|
||||
|
||||
If you want simple and fast, use While True Loop. Five lines can run, but features are limited.
|
||||
|
||||
If you want general recommendation, use Ralph Wiggum. Official support, complete capability, suitable for most cases.
|
||||
|
||||
If this is production usage, use enhanced Ralph. It has extra safety mechanisms and is more reliable.
|
||||
|
||||
(For Agent Teams multi-agent collaboration, see the next section: "3.3 Agent Teams Multi-Agent Collaboration.")
|
||||
|
||||
Hopefully this chapter helps you use Claude Code more effectively so AI becomes a true productivity tool rather than only a chatbot.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Official Resources
|
||||
|
||||
- [Claude Code Official Docs](https://docs.anthropic.com/en/docs/claude-code) - complete official Claude Code documentation
|
||||
- [Ralph Wiggum Plugin README](https://github.com/anthropics/claude-plugins-official/tree/main/plugins/ralph-wiggum) - official plugin documentation
|
||||
- [Claude Code Hooks](https://docs.anthropic.com/en/docs/claude-code/configuration/hooks) - official Hooks system docs
|
||||
|
||||
### Community Projects
|
||||
|
||||
- [frankbria/ralph-claude-code](https://github.com/frankbria/ralph-claude-code) (2.1k stars) - enhanced Ralph implementation with additional safeguards
|
||||
- [Awesome Ralph](https://github.com/snwfdhmp/awesome-ralph) - curated Ralph resources and examples
|
||||
- [Ralph Ryan](https://github.com/wquguru/ralph-ryan) - PRD generation + Ralph loop integration
|
||||
- [snarktank/ralph](https://github.com/snarktank/ralph) - original Ralph implementation
|
||||
|
||||
### Articles and Tutorials
|
||||
|
||||
**English resources**
|
||||
|
||||
- [Geoffrey Huntley - Ralph Technique](https://ghuntley.com/ralph/) - original Ralph concept by creator
|
||||
- [Effective Framework Practices for Reliable Long-Running AI Agents](https://m.blog.csdn.net/weixin_48708052/article/details/158044721) - deep read of Anthropic engineering blog
|
||||
- [Complete Claude Code Guide](https://developer.aliyun.com/article/1705912) - full usage guide
|
||||
|
||||
**Chinese tutorials**
|
||||
|
||||
- [Beginner-Friendly Tutorial - CSDN](https://m.blog.csdn.net/zsr154278963/article/details/156637281) - detailed install and usage guide
|
||||
- [Deep Analysis - Toutiao](https://m.toutiao.com/a7585579989207188006/) - mechanism and core principles
|
||||
- [Full-Stack Plain-Language Guide](https://www.jdon.com/90167-ralph-wigum-loop-explained-for-teens.html) - complete walkthrough from principles to practice
|
||||
- [Beginner and Practical Guide - CNBlogs](https://www.cnblogs.com/buwai/p/19625356) - basics and practical examples
|
||||
- [Ralph Loop Deep Dive - CSDN](https://m.blog.csdn.net/roamingcode/article/details/156732443) - Stop Hook mechanism details
|
||||
- [Claude Code Perpetual Engine - CSDN](https://m.blog.csdn.net/qq_44866828/article/details/156736656) - infinite-loop iteration plugin deep dive
|
||||
- [Ralph Loop New User Starter - CNBlogs](https://www.cnblogs.com/gyc567/p/19495639) - best practices and prompt summary
|
||||
|
||||
### Practical Case Studies
|
||||
|
||||
- [CURSED Programming Language](https://github.com/geoffreyhuntley/cursed) - complete programming language built with Ralph over 3 months
|
||||
- [Boris Cherny's 30 Days](https://twitter.com/boriskirov/status/1756002385683786616) - 259 PRs case share
|
||||
- [Y Combinator Hackathon](https://github.com/geoffreyhuntley/ralph) - 6-project overnight generation case
|
||||
- [Geoffrey Huntley's Blog](https://ghuntley.com/) - creator's technical blog
|
||||
@@ -0,0 +1,571 @@
|
||||
# Claude Code MCP Complete Guide
|
||||
|
||||
## What is Claude Code MCP?
|
||||
|
||||
**Claude Code** is Anthropic's official AI command-line tool, while **MCP (Model Context Protocol)** is the protocol that allows Claude Code to connect to external tools and services.
|
||||
|
||||
Put simply, MCP turns Claude Code from an AI assistant that can only read and write local files into a super assistant that can access GitHub, databases, APIs, and cloud services.
|
||||
|
||||
## Why use MCP in Claude Code?
|
||||
|
||||
### Claude Code without MCP
|
||||
|
||||
```text
|
||||
What you can do:
|
||||
✓ Read local files
|
||||
✓ Edit code
|
||||
✓ Run commands
|
||||
✓ Use Bash tools
|
||||
|
||||
What you cannot do:
|
||||
✗ View your GitHub Issues
|
||||
✗ Access a cloud database
|
||||
✗ Call external APIs
|
||||
✗ Get real-time weather
|
||||
```
|
||||
|
||||
### Claude Code with MCP
|
||||
|
||||
```text
|
||||
What you can do:
|
||||
✓ All original functions
|
||||
✓ View / create GitHub Issues and PRs
|
||||
✓ Query SQLite and PostgreSQL databases
|
||||
✓ Access external services such as Notion and Slack
|
||||
✓ Get real-time weather and map data
|
||||
✓ Browser automation
|
||||
✓ ...and more
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Step 1: Understand where the config files live
|
||||
|
||||
Claude Code's MCP configuration files are located at:
|
||||
|
||||
| Level | Config file path | Scope |
|
||||
|-----|-------------|----------|
|
||||
| **User level** | `~/.claude.json` | All projects |
|
||||
| **Project level** | `.claude/mcp.json` | Current project |
|
||||
|
||||
It is recommended to use **project-level config** first, so different projects can use different MCP services.
|
||||
|
||||
### Step 2: Add MCP servers with natural language
|
||||
|
||||
In Claude Code, you do not need to manually edit configuration files or memorize commands. You can describe what you want in natural language:
|
||||
|
||||
```text
|
||||
You: Help me add a GitHub MCP server. My token is ghp_xxx
|
||||
|
||||
Claude: I'll help you configure the GitHub MCP server...
|
||||
|
||||
[Automatically updates .claude/mcp.json]
|
||||
```
|
||||
|
||||
```text
|
||||
You: Add a SQLite database server. The database file is at ./data/app.db
|
||||
|
||||
Claude: Okay, I'll configure the SQLite MCP server...
|
||||
```
|
||||
|
||||
```text
|
||||
You: Add an HTTP-type MCP server with the address https://api.example.com/mcp
|
||||
|
||||
Claude: I'll add that remote MCP server...
|
||||
```
|
||||
|
||||
### Step 3: Verify the configuration
|
||||
|
||||
Ask Claude Code directly:
|
||||
|
||||
```text
|
||||
You: What MCP servers are available now?
|
||||
|
||||
Claude: Currently configured MCP servers:
|
||||
• github - GitHub integration
|
||||
• sqlite - SQLite database
|
||||
• filesystem - Filesystem access
|
||||
```
|
||||
|
||||
Or use the diagnostic command:
|
||||
|
||||
```text
|
||||
/doctor
|
||||
```
|
||||
|
||||
### Step 4: Start using it
|
||||
|
||||
Once configuration succeeds, you can call MCP functions directly with natural language:
|
||||
|
||||
```text
|
||||
You: Help me create an Issue on GitHub
|
||||
|
||||
Claude: I can help you create a GitHub Issue. Please tell me:
|
||||
- the repository address, for example owner/repo
|
||||
- the Issue title
|
||||
- the Issue description
|
||||
```
|
||||
|
||||
## Natural-language management in Claude Code
|
||||
|
||||
### View and manage MCP servers
|
||||
|
||||
You can interact with Claude Code entirely in natural language:
|
||||
|
||||
```text
|
||||
You: List all configured MCP servers
|
||||
|
||||
You: Check the connection status of the MCP servers
|
||||
|
||||
You: Delete the MCP server named notion
|
||||
|
||||
You: Update the token for the github server
|
||||
```
|
||||
|
||||
### Diagnose problems
|
||||
|
||||
When you run into problems:
|
||||
|
||||
```text
|
||||
You: Check what's wrong with the MCP connection
|
||||
|
||||
Claude: [will automatically run diagnostics, analyze configuration files, and check server status]
|
||||
```
|
||||
|
||||
## Configuration methods in detail
|
||||
|
||||
### User-level config, global
|
||||
|
||||
Edit `~/.claude.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/Users/yourname/Documents"]
|
||||
},
|
||||
"github": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-github"],
|
||||
"env": {
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "your-token"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Project-level config, recommended
|
||||
|
||||
Edit `.claude/mcp.json` in the project root:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"project-db": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-sqlite", "--db-path", "./data/app.db"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Advantages of project-level config:**
|
||||
|
||||
- Team members can share the config by committing it to Git
|
||||
- Different projects can use different MCP services
|
||||
- Configuration is more flexible and does not pollute global settings
|
||||
|
||||
### Transport modes
|
||||
|
||||
Claude Code supports three transport modes:
|
||||
|
||||
#### STDIO, local process
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"local-tool": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/path"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### HTTP, remote service
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"remote-api": {
|
||||
"url": "https://api.example.com/mcp",
|
||||
"transport": "http",
|
||||
"headers": {
|
||||
"Authorization": "Bearer your-token"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### SSE, server-sent events
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"streaming": {
|
||||
"url": "https://api.example.com/sse",
|
||||
"transport": "sse"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Practical examples
|
||||
|
||||
### Example 1: GitHub workflow automation
|
||||
|
||||
```text
|
||||
You: Help me push the current changes to GitHub, then create a PR titled "Add new feature"
|
||||
|
||||
Claude:
|
||||
1. Check current git status...
|
||||
2. Create a new branch feature/new-feature...
|
||||
3. Commit changes...
|
||||
4. Push to remote...
|
||||
5. Call github_create_pull_request to create the PR...
|
||||
6. PR created: https://github.com/owner/repo/pull/123
|
||||
```
|
||||
|
||||
### Example 2: Database query
|
||||
|
||||
```text
|
||||
You: Query the 10 most recently registered users in the database
|
||||
|
||||
Claude:
|
||||
1. Connect to the SQLite database...
|
||||
2. Execute query: SELECT * FROM users ORDER BY created_at DESC LIMIT 10
|
||||
3. Return results:
|
||||
- Zhang San (zhang@example.com) - 2025-02-28
|
||||
- Li Si (li@example.com) - 2025-02-27
|
||||
...
|
||||
```
|
||||
|
||||
### Example 3: Webpage screenshot analysis
|
||||
|
||||
```text
|
||||
You: Open https://example.com, take a screenshot, and analyze the page design
|
||||
|
||||
Claude:
|
||||
1. Launch browser...
|
||||
2. Navigate to https://example.com...
|
||||
3. Capture screenshot...
|
||||
4. [Returns screenshot]
|
||||
5. Page design analysis:
|
||||
- The layout uses a centered design with a max width of 1200px
|
||||
- The main color is blue (#3b82f6)
|
||||
- The navigation bar is fixed to the top
|
||||
...
|
||||
```
|
||||
|
||||
### Example 4: Integration with Notion
|
||||
|
||||
```text
|
||||
You: Save the notes I just wrote to Notion
|
||||
|
||||
Claude:
|
||||
1. Use the Notion MCP server...
|
||||
2. Create a new page...
|
||||
3. Saved: https://notion.so/page/xxx
|
||||
```
|
||||
|
||||
## Debugging tips
|
||||
|
||||
### Diagnose with natural language
|
||||
|
||||
When something goes wrong, just tell Claude Code directly:
|
||||
|
||||
```text
|
||||
You: My MCP server cannot connect. Please check it for me
|
||||
|
||||
You: The GitHub MCP tool call failed. What is the reason?
|
||||
|
||||
You: Why does the sqlite server always show "connecting"?
|
||||
```
|
||||
|
||||
Claude Code will automatically:
|
||||
|
||||
1. Check the configuration file format
|
||||
2. Validate environment variables
|
||||
3. Test the server connection
|
||||
4. Provide concrete fix suggestions
|
||||
|
||||
### Common problem troubleshooting
|
||||
|
||||
| Problem | Possible cause | Solution |
|
||||
|-----|---------|----------|
|
||||
| Server not connected | Config file format error | Check JSON syntax |
|
||||
| Tool cannot be called | Insufficient permissions | Check environment variables |
|
||||
| Connection timeout | Network problem | Check URL or network |
|
||||
| Process crashes | Bug in server code | Check server logs |
|
||||
|
||||
### Manual diagnostic command
|
||||
|
||||
```text
|
||||
/doctor
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```text
|
||||
System Diagnostic Report:
|
||||
===============
|
||||
|
||||
Claude Code: v2.5.0 ✓
|
||||
Node.js: v20.0.0 ✓
|
||||
|
||||
MCP server status:
|
||||
• github: ✓ Connected (12 tools)
|
||||
• sqlite: ✗ Connection failed - Database file not found
|
||||
• puppeteer: ✓ Connected (8 tools)
|
||||
|
||||
Suggestions:
|
||||
1. Check whether the sqlite database path is correct
|
||||
2. Make sure the .claude/mcp.json format is correct
|
||||
```
|
||||
|
||||
## Best practices
|
||||
|
||||
### 1. Prefer project-level configuration
|
||||
|
||||
**Why recommend project-level configuration?**
|
||||
|
||||
Different projects often need different MCP services. For example, a frontend project may need browser testing tools, while a backend project may need database connections. With project-level configuration, each project can have its own dedicated set of MCP servers, avoiding the chaos of one large global config.
|
||||
|
||||
More importantly, project-level config can be committed to Git. After team members clone the project, they can directly use the same MCP services without reconfiguring everything.
|
||||
|
||||
```text
|
||||
Project A, frontend project -> .claude/mcp.json contains browser testing MCP
|
||||
Project B, backend project -> .claude/mcp.json contains database MCP
|
||||
```
|
||||
|
||||
### 2. Store sensitive information in environment variables
|
||||
|
||||
**Never hard-code secrets in the configuration file.**
|
||||
|
||||
Configuration files may be accidentally committed to Git and leak keys. The correct approach is to store sensitive values in environment variables and only reference the variable names from the config file. That way, even if the config file becomes public, the real secrets are still hidden.
|
||||
|
||||
```json
|
||||
{
|
||||
"env": {
|
||||
"GITHUB_TOKEN": "$GITHUB_TOKEN",
|
||||
"GITHUB_TOKEN": "ghp_abc123"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The first form is good because it reads from the environment variable. The second form is bad because it hard-codes a secret directly.
|
||||
|
||||
### 3. Pin versions
|
||||
|
||||
**Why do you need to pin versions?**
|
||||
|
||||
By default, `npx -y` will always use the latest version of an MCP server. This can cause problems: a new version may introduce breaking changes, or a package may suddenly be removed or renamed.
|
||||
|
||||
By appending `@version` to the package name, you ensure that a validated version is always used, reducing surprises caused by automatic upgrades.
|
||||
|
||||
```json
|
||||
{
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-github@1.2.3"]
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Document your MCP configuration
|
||||
|
||||
**Help teammates understand the MCP setup quickly**
|
||||
|
||||
When a project includes multiple MCP servers, new team members may not understand what each server is for or what configuration it requires. Creating a `README.md` under the `.claude/` directory that explains each server's purpose, required config, and how to obtain credentials can significantly reduce communication cost.
|
||||
|
||||
Create `.claude/README.md` in your project:
|
||||
|
||||
```markdown
|
||||
# MCP Configuration Notes
|
||||
|
||||
MCP servers used in this project:
|
||||
|
||||
## github
|
||||
Used for GitHub automation. Requires GITHUB_TOKEN.
|
||||
|
||||
## sqlite
|
||||
Connects to ./data/app.db for querying and modifying data.
|
||||
|
||||
## puppeteer
|
||||
Used for E2E testing.
|
||||
```
|
||||
|
||||
## Claude Code vs Claude Desktop
|
||||
|
||||
| Feature | Claude Code | Claude Desktop |
|
||||
|-----|-------------|----------------|
|
||||
| **Config file** | `~/.claude.json` or `.claude/mcp.json` | `claude_desktop_config.json` |
|
||||
| **Project-level config** | ✓ Supported | ✗ Not supported |
|
||||
| **Natural-language management** | ✓ Supported | ✗ Manual editing required |
|
||||
| **Diagnostics** | ✓ `/doctor` | ✗ None |
|
||||
| **Hot reload** | ✓ Automatic | ✗ Requires app restart |
|
||||
| **Use cases** | Development workflow, CI/CD | Daily use, office tasks |
|
||||
|
||||
## Common MCP servers
|
||||
|
||||
> 💡 For the complete MCP server list, please refer to the appendix: [MCP Server Directory](/zh-cn/appendix/mcp-servers/)
|
||||
|
||||
### GitHub server
|
||||
|
||||
**Function:** Issues, PRs, repository management
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-github"],
|
||||
"env": {
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "your-token"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Get a token from:** https://github.com/settings/tokens
|
||||
|
||||
### SQLite server
|
||||
|
||||
**Function:** Query and manage SQLite databases
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"sqlite": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-sqlite", "--db-path", "./data/database.db"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Filesystem server
|
||||
|
||||
**Function:** Access files inside a specified directory
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/Users/yourname/Documents"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Puppeteer browser automation
|
||||
|
||||
**Function:** Browser control, screenshots, automated testing
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"puppeteer": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-puppeteer"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Brave search server
|
||||
|
||||
**Function:** Web search
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"brave-search": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-brave-search"],
|
||||
"env": {
|
||||
"BRAVE_API_KEY": "your-brave-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Reference resources
|
||||
|
||||
### Official documentation
|
||||
|
||||
- [Claude Code official documentation - MCP](https://docs.anthropic.com/zh-CN/docs/claude-code/mcp)
|
||||
- [MCP official website](https://modelcontextprotocol.io/)
|
||||
- [MCP specification documentation](https://modelcontextprotocol.io/specification/)
|
||||
- [MCP GitHub repository](https://github.com/modelcontextprotocol)
|
||||
|
||||
### Official servers
|
||||
|
||||
- [@modelcontextprotocol/server-github](https://github.com/modelcontextprotocol/servers/tree/main/src/github) - GitHub integration
|
||||
- [@modelcontextprotocol/server-sqlite](https://github.com/modelcontextprotocol/servers/tree/main/src/sqlite) - SQLite database
|
||||
- [@modelcontextprotocol/server-postgres](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres) - PostgreSQL database
|
||||
- [@modelcontextprotocol/server-filesystem](https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem) - Filesystem access
|
||||
- [@modelcontextprotocol/server-puppeteer](https://github.com/modelcontextprotocol/servers/tree/main/src/puppeteer) - Browser automation
|
||||
- [@modelcontextprotocol/server-fetch](https://github.com/modelcontextprotocol/servers/tree/main/src/fetch) - Web fetching
|
||||
- [@modelcontextprotocol/server-brave-search](https://github.com/modelcontextprotocol/servers/tree/main/src/brave-search) - Brave search
|
||||
- [@modelcontextprotocol/server-git](https://github.com/modelcontextprotocol/servers/tree/main/src/git) - Git operations
|
||||
|
||||
### Tutorial articles
|
||||
|
||||
- [A thorough explanation of MCP principles and practice](https://view.inews.qq.com/a/20250414A023WV00)
|
||||
- [MCP (Model Context Protocol) architecture and how it works](https://m.toutiao.com/w/1826385835060307/)
|
||||
- [2025 latest large-model tutorial: from getting started to mastering the MCP protocol](https://m.blog.csdn.net/weixin_45653328/article/details/150916706)
|
||||
- [Learn MCP from scratch (8) - build an MCP server](https://juejin.cn/post/7582510291667419187)
|
||||
|
||||
### Configuration guides
|
||||
|
||||
- [Claude Code best practices](https://www.anthropic.com/engineering/claude-code-best-practices)
|
||||
- [Claude Code complete configuration guide](https://juejin.cn/post/7576838552472043563)
|
||||
|
||||
### Development tutorials
|
||||
|
||||
- [Beginner-friendly MCP server practical guide in both TypeScript and Python](https://m.blog.csdn.net/ztt123654/article/details/150844207)
|
||||
- [Ultimate MCP server building guide: complete TypeScript and Python tutorials](https://m.blog.csdn.net/gitblog_00703/article/details/154862128)
|
||||
- [Build the simplest MCP server with TypeScript](https://m.blog.csdn.net/weixin_45653525/article/details/148433757)
|
||||
- [Generate a TypeScript MCP server using Azure container applications](https://learn.microsoft.com/zh-cn/azure/developer/ai/build-mcp-server-ts)
|
||||
|
||||
### MCP server resources
|
||||
|
||||
- [Awesome MCP Servers](https://github.com/punkpeye/awesome-mcp-servers) - the most comprehensive MCP server list
|
||||
- [Official MCP Registry](https://registry.modelcontextprotocol.io) - Anthropic's official app store
|
||||
- [MCP.so](https://mcp.so) - community MCP server center
|
||||
- [Glama.ai MCP](https://glama.ai/mcp/servers) - MCP directory with ratings and comments
|
||||
- [Smithery](https://smithery.ai) - MCP server marketplace
|
||||
- [MCPHub](https://mcphub.io/registry) - clean interface directory
|
||||
- [LobeHub MCP](https://lobehub.com/zh/mcp) - Chinese MCP directory
|
||||
|
||||
### Map and weather services
|
||||
|
||||
- [Amap MCP Server](https://lobehub.com/zh/mcp/luozengchang-mcp-amap)
|
||||
- [Tencent Location Service MCP documentation](https://lbs.qq.com/service/MCPServer/MCPServerGuide/overview)
|
||||
- [Caiyun Weather MCP Server](https://github.com/caiyunapp/mcp-caiyun-weather)
|
||||
- [OpenWeatherMap MCP Server](https://github.com/CodeByWaqas/weather-mcp-server)
|
||||
|
||||
### Community resources
|
||||
|
||||
- [Everything Claude Code Config](https://github.com/affaan-m/everything-claude-code) - production-grade Claude Code configuration collection
|
||||
- [AI Coding Guide](https://github.com/hacket/AICodingGuide) - Chinese learning path for Claude Code
|
||||
|
||||
### Real-world application cases
|
||||
|
||||
- [BlenderMCP - AI-driven 3D modeling](https://github.com/Belthur/blender-mcp) - 4,100+ ⭐
|
||||
- [15 best practices for MCP in production](https://learn.microsoft.com/zh-cn/azure/azure-functions/scenario-mcp-apps)
|
||||
@@ -0,0 +1,935 @@
|
||||
# Claude Code Remote Development on Mobile
|
||||
|
||||
## Introduction
|
||||
|
||||
Imagine these scenarios: you suddenly think of a brilliant bug-fix idea on the subway during your commute; you receive an urgent production incident alert while waiting in line at a cafe; you want to check how your AI-built project is progressing while accompanying your girlfriend shopping.
|
||||
|
||||
In traditional development workflows, these scenarios usually mean you need to find a place to open your laptop, or helplessly postpone the work. But in the AI-assisted coding era, the rules have changed. Claude Code makes it possible to carry your development environment in your pocket and stay productive anytime, anywhere.
|
||||
|
||||
In the summer of 2025, as Claude Code adoption grew, developers started exploring different "coding on phone" approaches. From simple local Termux usage, to complex SSH + Tailscale remote connections, to dedicated Happy Coder apps, a full mobile development ecosystem gradually took shape.
|
||||
|
||||
The core problem this chapter solves is: how to make Claude Code follow your phone and become a true "pocket development assistant."
|
||||
|
||||
---
|
||||
|
||||
::: info Community Feedback at a Glance
|
||||
|
||||
Based on real-world community feedback, the experience of each approach compares as follows:
|
||||
|
||||
**Happy Coder (Approach 2)**
|
||||
- Connection stability issues: disconnections happen often, and context is lost after disconnects
|
||||
- Limited functionality: cannot use `/` commands
|
||||
- Security concerns: depends on official relay servers, and some users are concerned about data security
|
||||
|
||||
**HAPI (Approach 3)**
|
||||
- Supports self-hosted servers: can be deployed on your own VPS
|
||||
- Better experience when paired with Tailscale: run `hapi server` on your computer and connect from your phone through the Tailscale IP
|
||||
- Relatively stable connection, suitable for long-term use
|
||||
|
||||
**Claude Remote Control (Official Approach)**
|
||||
- Official solution, natively integrated with Claude Code
|
||||
- Supports full access to local environments (MCP, tools, project configuration)
|
||||
- Requires Max subscription (Pro support is coming soon)
|
||||
- Relies on Anthropic cloud connectivity
|
||||
|
||||
**Recommendation**: if you require high connection stability, or are concerned about third-party relay security, choose **HAPI + Tailscale** or the **official Remote Control** approach.
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## Core Principle: Mobile Development Architecture Patterns
|
||||
|
||||
Before introducing specific approaches, first understand the essence of the problem.
|
||||
|
||||
### Why is mobile development a problem?
|
||||
|
||||
Traditional IDEs (such as VS Code and IntelliJ) require a full operating system environment, strong CPU, large memory, and storage space. Although phones are increasingly powerful, they still have natural limits for development experience:
|
||||
|
||||
**Input constraints**: virtual keyboards are inefficient for coding, and complex syntax is easy to mistype
|
||||
|
||||
**Screen constraints**: small screens make it hard to view code, terminal, and browser at the same time
|
||||
|
||||
**Environment constraints**: phones cannot run full development toolchains (compilers, databases, debuggers)
|
||||
|
||||
**Connection constraints**: mobile networks are unstable, and SSH sessions disconnect easily
|
||||
|
||||
### Core idea: thin-client architecture
|
||||
|
||||
The core idea behind all mobile development approaches is the same: the phone is only the "control console"; real development work is done elsewhere.
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Phone │ │ Host/Cloud │ │
|
||||
│ │ (Controller)│ ────────► │ (Executor) │ │
|
||||
│ │ │ Commands │ │ │
|
||||
│ │ • Send cmds │ │ • Run CLI │ │
|
||||
│ │ • View out │ │ • Exec code │ │
|
||||
│ │ • Review │ │ • Access fs │ │
|
||||
│ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
This architecture allows the phone to focus only on human-computer interaction, while heavy computation is delegated to your host or cloud.
|
||||
|
||||
---
|
||||
|
||||
## Approach 1: Official iOS App
|
||||
|
||||
In October 2025, Anthropic officially launched Claude Code mobile support in the iOS app. This is the simplest mobile development option.
|
||||
|
||||
### Regional limitations
|
||||
|
||||
Important note: the Claude app **cannot be used directly** in mainland China.
|
||||
|
||||
If you are in mainland China, it is recommended to use **Happy Coder** directly (Approach 2), which can work normally through configured domestic API relay services.
|
||||
|
||||
If you have an overseas Apple ID, you can switch regions and download the Claude app.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────────┐
|
||||
│ iOS App │ ──────────────────► │ Anthropic Cloud │
|
||||
│ (Phone) │ HTTPS + OAuth │ Claude Code │
|
||||
└─────────────┘ └────────┬────────┘
|
||||
│
|
||||
▼
|
||||
┌───────────────┐
|
||||
│ GitHub API │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Your phone app only sends commands. All code execution runs in Anthropic's cloud sandbox, and results are synced through GitHub.
|
||||
|
||||
### Basic usage
|
||||
|
||||
**Prerequisites:**
|
||||
|
||||
- iPhone with iOS 15 or later
|
||||
- Claude Pro/Team/Enterprise subscription (free plan is not supported)
|
||||
- GitHub account
|
||||
|
||||
**Steps:**
|
||||
|
||||
1. Download Claude app from App Store
|
||||
2. Log in to your Anthropic account
|
||||
3. Find the "Code" tab in the app
|
||||
4. Connect your GitHub repository through OAuth
|
||||
5. Start creating tasks
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are zero setup barrier, smooth experience, and push notifications. Cons are iOS-only support, primary GitHub workflow, relatively limited capability (cannot access local file systems), and no direct availability in mainland China.
|
||||
|
||||
---
|
||||
|
||||
## Approach 2: Happy Coder
|
||||
|
||||
Happy Coder is an open-source mobile and web client designed for Claude Code and Codex, with end-to-end encryption and remote control of your AI coding assistant from anywhere.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Happy App │ ────────► │ Happy Server │ ◄──────── │happy-coder │
|
||||
│ (Phone/Web) │ Encrypted WS │ (Relay) │ WebSocket │ (Desktop) │
|
||||
└─────────────┘ └─────────────┘ └──────┬──────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│Claude Code │
|
||||
│ CLI │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
On your computer, run `happy` instead of `claude` to launch your AI coding assistant. When you need phone control, the session automatically switches to remote mode. Press any key on your computer to switch back to local control.
|
||||
|
||||
### Installation and usage
|
||||
|
||||
**Step 1: download app**
|
||||
|
||||
| Platform | Link |
|
||||
|------|------|
|
||||
| iOS | [App Store](https://apps.apple.com/us/app/happy-claude-code-client/id6748571505) |
|
||||
| Android | [Google Play](https://play.google.com/store/apps/details?id=com.ex3ndr.happy) |
|
||||
| Web | [app.happy.engineering](https://app.happy.engineering) |
|
||||
|
||||
**Step 2: install CLI on computer**
|
||||
|
||||
```bash
|
||||
npm install -g happy-coder
|
||||
```
|
||||
|
||||
**Step 3: launch and pair**
|
||||
|
||||
```bash
|
||||
# run in your project directory
|
||||
cd ~/my-project
|
||||
happy
|
||||
|
||||
# a pairing QR code will be shown
|
||||
```
|
||||
|
||||
**Step 4: scan and pair on phone**
|
||||
|
||||
Open Happy app and scan the QR code shown on your computer. After pairing succeeds, you can control Claude Code from your phone.
|
||||
|
||||
**Step 5: use**
|
||||
|
||||
```bash
|
||||
# launch Claude Code
|
||||
happy
|
||||
|
||||
# or launch Codex
|
||||
happy codex
|
||||
```
|
||||
|
||||
### Resource links
|
||||
|
||||
- [GitHub Project](https://github.com/slopus/happy) - source code
|
||||
- [Documentation](https://happy.engineering/docs) - usage docs
|
||||
- [Discord Community](https://discord.gg/fX9WBAhyfD) - community discussion
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are simple setup, cross-platform support, end-to-end encryption, and open-source auditability. Cons are dependence on third-party relay infrastructure and the need to verify mobile app availability in your own environment.
|
||||
|
||||
---
|
||||
|
||||
## Approach 3: HAPI
|
||||
|
||||
HAPI is an alternative to Happy Coder, with a local-first design and support for seamless device switching across multiple AI models.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ HAPI App │ ────────► │ HAPI Server │ ◄──────── │ hapi │
|
||||
│ (Phone/PWA/ │ WireGuard │ (Self-hosted│ WireGuard │ (Desktop) │
|
||||
│ Telegram) │ + TLS │ relay) │ + TLS │ │
|
||||
└─────────────┘ └─────────────┘ └──────┬──────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│Claude Code │
|
||||
│ / Codex / │
|
||||
│ Gemini etc. │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
HAPI uses WireGuard plus TLS for end-to-end encryption. All communication goes through encrypted relay servers. You can self-host relay servers to fully control your data flow.
|
||||
|
||||
### Core features
|
||||
|
||||
- **Seamless switching**: switch control between desktop and phone; press any key to return to local control
|
||||
- **Native-first**: mobile apps are wrapped with native technology for smooth interaction
|
||||
- **AFK approvals**: receive approval requests on your phone while away from your computer
|
||||
- **Multi-model support**: supports Claude Code, Codex, Gemini, OpenCode, and more
|
||||
- **Terminal anywhere**: access via PWA, Telegram Mini App, and more
|
||||
- **Voice control**: supports voice input commands, so your hands stay free
|
||||
|
||||
### Installation and usage
|
||||
|
||||
**Step 1: start relay server**
|
||||
|
||||
```bash
|
||||
# run on your server (or launch directly with npx)
|
||||
npx @twsxtd/hapi hub --relay
|
||||
```
|
||||
|
||||
**Step 2: install CLI on computer**
|
||||
|
||||
```bash
|
||||
# run in your project directory
|
||||
cd ~/my-project
|
||||
npx @twsxtd/hapi
|
||||
|
||||
# or install globally
|
||||
npm install -g @twsxtd/hapi
|
||||
hapi
|
||||
```
|
||||
|
||||
**Step 3: pair devices**
|
||||
|
||||
Follow terminal prompts, open HAPI app on your phone, and scan the QR code to complete pairing.
|
||||
|
||||
**Step 4: access methods**
|
||||
|
||||
| Access Method | Description |
|
||||
|---------|------|
|
||||
| Web PWA | Browser access, supports install-to-home-screen |
|
||||
| Telegram Mini App | Use directly inside Telegram |
|
||||
| Mobile App | Native app experience (if published) |
|
||||
|
||||
### Differences from Happy Coder
|
||||
|
||||
| Feature | Happy Coder | HAPI |
|
||||
|------|-------------|------|
|
||||
| Design philosophy | Cloud-first | Local-first |
|
||||
| Encryption method | WebSocket + E2E | WireGuard + TLS |
|
||||
| Multi-model support | Claude Code, Codex | Claude, Codex, Gemini, OpenCode |
|
||||
| Access methods | iOS/Android/Web | PWA, Telegram, more |
|
||||
| Voice control | No | Yes |
|
||||
| AFK approvals | No | Yes |
|
||||
| Self-hosted relay | Requires manual deployment | Out-of-the-box support |
|
||||
|
||||
### Resource links
|
||||
|
||||
- [GitHub Project](https://github.com/tiann/hapi) - source code
|
||||
- [PWA Docs](https://github.com/tiann/hapi/blob/main/docs/pwa.md) - PWA installation and usage
|
||||
- [How It Works](https://github.com/tiann/hapi/blob/main/docs/how-it-works.md) - technical implementation details
|
||||
- [Voice Assistant](https://github.com/tiann/hapi/blob/main/docs/voice.md) - voice control features
|
||||
- [Why HAPI](https://github.com/tiann/hapi/blob/main/docs/why-hapi.md) - design philosophy
|
||||
- [FAQ](https://github.com/tiann/hapi/blob/main/docs/faq.md) - frequently asked questions
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are local-first design, multi-model support, end-to-end encryption, voice control, and self-hosted relay capability. Cons are that the project is relatively new and the ecosystem is still growing.
|
||||
|
||||
---
|
||||
|
||||
## Approach 4: SSH + Tailscale + Tmux
|
||||
|
||||
This is the best option for professional developers. You remotely connect to your development machine over SSH and keep sessions persistent with Tmux.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Phone │ ────────► │ Tailscale │ ◄──────── │ Computer │
|
||||
│ (SSH client)│ VPN P2P │ relay/hole │ VPN P2P │ (dev host) │
|
||||
└─────────────┘ └─────────────┘ └──────┬──────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│ Tmux │
|
||||
│ (session │
|
||||
│ persistence)│
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
Tailscale creates a peer-to-peer VPN so you can access your home computer from any network. Tmux ensures Claude Code keeps running in the background even when SSH disconnects.
|
||||
|
||||
### Why do you need Tailscale?
|
||||
|
||||
**Problems with traditional SSH:**
|
||||
|
||||
```text
|
||||
Phone (4G) ──XX──> Router NAT ──XX──> Home Computer
|
||||
(cannot penetrate) (LAN isolation)
|
||||
```
|
||||
|
||||
Your computer is on a private network, and your phone is on the public network, so direct access fails. Traditional solutions require port forwarding plus dynamic DNS, which are complex and risky.
|
||||
|
||||
**Tailscale solution:**
|
||||
|
||||
```text
|
||||
Phone (4G) ──► Tailscale Relay ──◄── Home Computer
|
||||
(auto hole-punch or relay)
|
||||
```
|
||||
|
||||
Tailscale uses NAT traversal, and falls back to relay automatically if traversal fails. The entire connection is encrypted.
|
||||
|
||||
### Full setup steps
|
||||
|
||||
**Step 1: install Tailscale on computer**
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
brew install --cask tailscale
|
||||
|
||||
# or download installer
|
||||
# https://tailscale.com/download
|
||||
```
|
||||
|
||||
**Step 2: log in and get IP**
|
||||
|
||||
```bash
|
||||
# start Tailscale
|
||||
sudo tailscale up
|
||||
|
||||
# check Tailscale IPv4
|
||||
tailscale ip -4
|
||||
# example output: 100.x.x.x
|
||||
```
|
||||
|
||||
**Step 3: install Tailscale on phone**
|
||||
|
||||
Download Tailscale from App Store or Google Play and log in with the same account.
|
||||
|
||||
**Step 4: install and configure Tmux**
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
brew install tmux
|
||||
|
||||
# create ~/.tmux.conf
|
||||
cat > ~/.tmux.conf << 'EOF'
|
||||
# enable mouse support
|
||||
set -g mouse on
|
||||
|
||||
# default terminal with 256 colors
|
||||
set -g default-terminal "screen-256color"
|
||||
|
||||
# change prefix key to Ctrl+A (optional)
|
||||
unbind C-b
|
||||
set -g prefix C-a
|
||||
|
||||
# simplified split shortcuts
|
||||
bind v split-window -h
|
||||
bind h split-window
|
||||
EOF
|
||||
```
|
||||
|
||||
**Step 5: create a persistent session**
|
||||
|
||||
```bash
|
||||
# create session named "claude"
|
||||
tmux new -s claude
|
||||
|
||||
# start Claude Code in this session
|
||||
cd ~/my-project
|
||||
claude
|
||||
|
||||
# detach without closing
|
||||
# press Ctrl+B then D
|
||||
```
|
||||
|
||||
**Step 6: connect from phone SSH client**
|
||||
|
||||
Recommended SSH clients:
|
||||
|
||||
| Client | Platform | Notes |
|
||||
|--------|------|------|
|
||||
| Blink Shell | iOS | Supports MOSH, great for unstable networks |
|
||||
| Termius | iOS/Android | Cross-platform and polished UI |
|
||||
| a-Shell | iOS | Free and lightweight |
|
||||
|
||||
Connection config:
|
||||
|
||||
```text
|
||||
Host: 100.x.x.x (your Tailscale IP)
|
||||
Port: 22
|
||||
Username: your computer username
|
||||
```
|
||||
|
||||
After connecting, attach to Tmux:
|
||||
|
||||
```bash
|
||||
tmux attach -t claude
|
||||
```
|
||||
|
||||
### Advanced tips
|
||||
|
||||
**Prevent your computer from sleeping:**
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
caffeinate -dimsu &
|
||||
|
||||
# or set System Settings > Energy Saver > prevent automatic sleep
|
||||
```
|
||||
|
||||
**Use MOSH for unstable networks:**
|
||||
|
||||
MOSH (Mobile Shell) is an SSH alternative optimized for mobile networks, with seamless recovery across network changes.
|
||||
|
||||
```bash
|
||||
# install on computer
|
||||
brew install mosh
|
||||
|
||||
# use MOSH from phone client
|
||||
# Blink Shell supports MOSH natively
|
||||
```
|
||||
|
||||
**One-command connect script:**
|
||||
|
||||
Set this as startup command in your SSH client:
|
||||
|
||||
```bash
|
||||
tmux attach -t claude || tmux new -s claude
|
||||
```
|
||||
|
||||
This will auto-attach to an existing session or create a new one.
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are full capabilities and desktop-equivalent workflow with all development tools. Cons are more complex setup and the requirement to keep your computer online.
|
||||
|
||||
---
|
||||
|
||||
## Approach 5: Local Termux Runtime
|
||||
|
||||
If you are an Android user, you can run Claude Code directly on your phone without connecting external devices.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │ Termux │ │
|
||||
│ │ (Linux env) │ │
|
||||
│ │ │ │
|
||||
│ │ • Node.js │ │
|
||||
│ │ • Claude │ │
|
||||
│ │ Code CLI │ │
|
||||
│ │ │ │
|
||||
│ │ • Project │ │
|
||||
│ │ files │ │
|
||||
│ │ • Git │ │
|
||||
│ └─────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ │
|
||||
│ │Anthropic API│ │
|
||||
│ └─────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Termux is a terminal emulator and Linux environment for Android. You can directly install Node.js and Claude Code in it.
|
||||
|
||||
### Installation steps
|
||||
|
||||
**Important**: download Termux from [F-Droid](https://f-droid.org/), not from Google Play (the Play version is outdated).
|
||||
|
||||
**Step 1: install base tools**
|
||||
|
||||
```bash
|
||||
# update package manager
|
||||
pkg update && pkg upgrade
|
||||
|
||||
# install development tools
|
||||
pkg install git nodejs python vim
|
||||
```
|
||||
|
||||
**Step 2: install Claude Code**
|
||||
|
||||
```bash
|
||||
npm install -g @anthropic-ai/claude-code
|
||||
```
|
||||
|
||||
**Step 3: configure environment**
|
||||
|
||||
```bash
|
||||
# create workspace
|
||||
mkdir -p ~/projects
|
||||
cd ~/projects
|
||||
|
||||
# initialize project
|
||||
git clone https://github.com/your-repo.git
|
||||
cd your-repo
|
||||
|
||||
# launch Claude Code
|
||||
claude
|
||||
```
|
||||
|
||||
**Step 4: configure external keyboard (recommended)**
|
||||
|
||||
In Termux:
|
||||
|
||||
```bash
|
||||
# enable extra keys row
|
||||
# long press screen > More > Extra keys row
|
||||
|
||||
# configure shortcuts
|
||||
# add in ~/.termux/termux.properties
|
||||
extra-keys = [['ESC','/','-','HOME','UP','END','PGUP','~'], \
|
||||
['TAB','CTRL','ALT','LEFT','DOWN','RIGHT','PGDN','|']]
|
||||
```
|
||||
|
||||
### Performance considerations
|
||||
|
||||
| Task Type | Android Performance |
|
||||
|---------|-------------|
|
||||
| Web development (HTML/CSS/JS) | Excellent |
|
||||
| Python scripts | Excellent |
|
||||
| Node.js applications | Good |
|
||||
| Running test suites | Medium |
|
||||
| Compiling large projects | Not recommended |
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are full local control, no external host dependency, and offline-first operation. Cons are limited phone performance, weak text input experience, and Android-only availability.
|
||||
|
||||
---
|
||||
|
||||
## Approach 6: Claude Code UI
|
||||
|
||||
Claude Code UI (also known as CloudCLI) is an open-source project that provides a web interface for Claude Code, with phone browser support.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│Phone Browser│ ────────► │ Web Server │ ◄──────── │Claude Code │
|
||||
│ │ HTTP/HTTPS │ (localhost) │ invoke │ CLI │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
You run a web server on your computer, then access it from your phone browser. This requires LAN access or tunneling.
|
||||
|
||||
### Installation and usage
|
||||
|
||||
**Step 1: install**
|
||||
|
||||
```bash
|
||||
# one-command start (recommended)
|
||||
npx @siteboon/claude-code-ui
|
||||
|
||||
# or global install
|
||||
npm install -g @siteboon/claude-code-ui
|
||||
claude-code-ui
|
||||
```
|
||||
|
||||
**Step 2: open interface**
|
||||
|
||||
Server defaults to `http://localhost:3001`.
|
||||
|
||||
**Step 3: access from phone**
|
||||
|
||||
Method A - LAN access (same Wi-Fi):
|
||||
|
||||
```bash
|
||||
# bind all interfaces
|
||||
claude-code-ui --host 0.0.0.0
|
||||
|
||||
# access from phone
|
||||
http://<computer-lan-ip>:3001
|
||||
```
|
||||
|
||||
Method B - ngrok tunnel:
|
||||
|
||||
```bash
|
||||
# install ngrok
|
||||
brew install ngrok
|
||||
|
||||
# start tunnel
|
||||
ngrok http 3001
|
||||
|
||||
# open ngrok URL from phone
|
||||
```
|
||||
|
||||
### Features
|
||||
|
||||
- Responsive design with mobile support
|
||||
- Built-in chat interface
|
||||
- File browser
|
||||
- Git operations UI
|
||||
- Session management
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are graphical interface and rich features. Cons are tunnel requirements outside LAN and relatively more complex setup.
|
||||
|
||||
---
|
||||
|
||||
## Approach 7: Cloud Development Environment
|
||||
|
||||
If you do not have an always-on local computer, you can use cloud development environments where Claude Code runs on cloud servers.
|
||||
|
||||
### How it works
|
||||
|
||||
```text
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Phone │ ────────► │ Cloud Box │ ─────────► │Claude Code │
|
||||
│(Browser/App)│ HTTPS │ (DevBox) │ │ CLI │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
A cloud container comes with Claude Code preinstalled, and you access it from browser or mobile app.
|
||||
|
||||
### Using Sealos DevBox
|
||||
|
||||
**Step 1: create environment**
|
||||
|
||||
Go to [Sealos DevBox](https://sealos.io/devbox), choose a Claude Code template, and create an environment.
|
||||
|
||||
**Step 2: start development environment**
|
||||
|
||||
Environment is ready in about 30-60 seconds, and you get a web terminal.
|
||||
|
||||
**Step 3: configure Claude API**
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
**Step 4: connect Happy app**
|
||||
|
||||
```bash
|
||||
# install happy-coder (or use preinstalled)
|
||||
npm install -g happy-coder
|
||||
|
||||
# generate pairing QR code
|
||||
happy
|
||||
```
|
||||
|
||||
After scanning on your phone, you can use it immediately.
|
||||
|
||||
### Cloud option comparison
|
||||
|
||||
| Platform | Claude Code | Mobile Optimization | Startup Time | Pricing |
|
||||
|------|------------|----------|----------|------|
|
||||
| Sealos DevBox | Preinstalled | Happy support | ~60s | Pay-as-you-go |
|
||||
| GitHub Codespaces | Manual setup | Browser flow | ~2-3 min | Free quota + hourly |
|
||||
| Gitpod | Manual setup | Browser flow | ~1-2 min | Free quota + hourly |
|
||||
| Replit | No native Claude Code | Native app | Instant | Free + subscription |
|
||||
|
||||
### Pros and cons
|
||||
|
||||
Pros are no local computer requirement, environment consistency, and scalability. Cons are paid usage, network dependency, and code hosted in cloud.
|
||||
|
||||
---
|
||||
|
||||
## Comparison and Selection
|
||||
|
||||
Each approach has different strengths and is suitable for different scenarios.
|
||||
|
||||
### Comparison table
|
||||
|
||||
| Approach | Difficulty | Requires Tunnel | Cost | Best Scenarios |
|
||||
|------|------|-------------|------|----------|
|
||||
| Official iOS App | Easy | No | $20/month | Quick checks, simple tasks |
|
||||
| Happy Coder | Relatively easy | No | Free | Daily use, convenience |
|
||||
| HAPI | Medium | No | Free | Multi-model, local-first |
|
||||
| SSH + Tailscale | Relatively complex | No | Free | Professional development, full features |
|
||||
| Termux | Medium | No | Free | Android local development |
|
||||
| Claude Code UI | Medium | Yes | Free | Web interface preference |
|
||||
| Cloud DevBox | Easy | No | Pay-as-you-go | No local computer |
|
||||
|
||||
### Selection guide
|
||||
|
||||
**If you are in mainland China**: use **Happy Coder**; with domestic API relay setup, it works well.
|
||||
|
||||
**If you want maximum convenience**: choose Happy Coder. Scan-and-use flow is very convenient.
|
||||
|
||||
**If you need multi-model support**: choose HAPI. It supports multiple AI coding assistants and is ideal for model switching workflows.
|
||||
|
||||
**If you have an always-on computer**: choose SSH + Tailscale. This gives the most complete experience.
|
||||
|
||||
**If you are an iPhone user (outside mainland China)**: official app is the easiest way to get started.
|
||||
|
||||
**If you only have Android**: Termux gives a fully local mobile development path.
|
||||
|
||||
**If you do not have a computer**: cloud DevBox is the ideal choice.
|
||||
|
||||
---
|
||||
|
||||
## Security and Privacy
|
||||
|
||||
Mobile development involves code transfer over networks, so security needs special attention.
|
||||
|
||||
### Risks of relay servers
|
||||
|
||||
When using relay-dependent services like Happy Coder or HAPI, consider these risks:
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ │
|
||||
│ What can a relay server potentially see? │
|
||||
│ │
|
||||
│ • Data before encryption (if E2E is implemented poorly) │
|
||||
│ • Metadata (when you connect, how long sessions run) │
|
||||
│ • Your API key (if configured incorrectly) │
|
||||
│ │
|
||||
│ What can a relay server potentially do? │
|
||||
│ │
|
||||
│ • Record your code content │
|
||||
│ • Steal API credentials │
|
||||
│ • Inject malicious commands │
|
||||
│ • Abuse your device as an attack node │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Security best practices
|
||||
|
||||
**1. Code sensitivity grading**
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ │
|
||||
│ Public projects/learning code -> any approach is acceptable│
|
||||
│ │
|
||||
│ Private projects -> prefer SSH+Tailscale or self-hosted │
|
||||
│ │
|
||||
│ Commercial code -> use SSH+Tailscale only, disable all │
|
||||
│ third-party relay paths │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**2. Key management**
|
||||
|
||||
```bash
|
||||
# do not hard-code keys in source
|
||||
const apiKey = "sk-ant-xxxxx"
|
||||
|
||||
# use environment variables
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY
|
||||
|
||||
# use .env files (add to .gitignore)
|
||||
ANTHROPIC_API_KEY=sk-ant-xxxxx
|
||||
```
|
||||
|
||||
**3. Use sandbox mode**
|
||||
|
||||
Claude Code supports sandbox mode to limit access scope:
|
||||
|
||||
```bash
|
||||
claude --sandbox /path/to/project
|
||||
```
|
||||
|
||||
**4. Self-host relay**
|
||||
|
||||
If using Happy Coder, consider self-hosting relay:
|
||||
|
||||
```bash
|
||||
# clone project (includes server implementation)
|
||||
git clone https://github.com/slopus/happy.git
|
||||
cd happy
|
||||
|
||||
# deploy server to your VPS
|
||||
# follow project documentation for details
|
||||
```
|
||||
|
||||
**5. Use Headscale**
|
||||
|
||||
Headscale is an open-source implementation of Tailscale and can be self-hosted:
|
||||
|
||||
```bash
|
||||
# one-command Docker deployment
|
||||
docker run -d \
|
||||
--name headscale \
|
||||
-v /srv/headscale:/etc/headscale \
|
||||
-p 3478:3478/udp \
|
||||
-p 8080:8080 \
|
||||
headscale/headscale:latest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
### Do I need NAT traversal?
|
||||
|
||||
Most modern approaches **do not** require manual NAT traversal:
|
||||
|
||||
| Approach | Principle |
|
||||
|------|------|
|
||||
| Happy Coder | Relay mode, both sides actively connect to server |
|
||||
| HAPI | Relay mode, WireGuard + TLS |
|
||||
| Tailscale | NAT hole-punching or relay |
|
||||
| iOS App | Cloud execution |
|
||||
| Claude Code UI | Requires inbound access |
|
||||
|
||||
### Why does relay mode not require traversal?
|
||||
|
||||
```text
|
||||
Outbound connection (NAT allows):
|
||||
Computer ──► Relay Server yes
|
||||
|
||||
Inbound connection (NAT blocks):
|
||||
External ──► Computer no
|
||||
|
||||
Relay trick:
|
||||
Both sides make outbound connections to the relay,
|
||||
so neither side needs inbound connectivity.
|
||||
```
|
||||
|
||||
### Does mobile development affect battery life?
|
||||
|
||||
Different approaches consume different power:
|
||||
|
||||
| Approach | Power Usage | Reason |
|
||||
|------|--------|------|
|
||||
| SSH terminal | Low | Text-only rendering |
|
||||
| iOS App | Medium | Cloud execution, phone controls only |
|
||||
| Termux | High | Local CLI runtime |
|
||||
| Browser | Medium | Web UI rendering load |
|
||||
|
||||
For long sessions, keep your phone charging.
|
||||
|
||||
### What happens when network disconnects?
|
||||
|
||||
| Approach | Impact of Network Disconnect |
|
||||
|------|-------------|
|
||||
| SSH + Tmux | Claude keeps running; recover on reconnect |
|
||||
| Happy Coder | Auto-reconnect |
|
||||
| HAPI | Auto-reconnect |
|
||||
| iOS App | Cloud continues; app shows disconnect |
|
||||
| Termux | Session interruption |
|
||||
|
||||
### Can I compile large projects on a phone?
|
||||
|
||||
Not recommended. Phone CPU and memory are limited, and large builds can cause:
|
||||
|
||||
- significant heating
|
||||
- rapid battery drain
|
||||
- very long compile times
|
||||
|
||||
Run heavy build tasks on remote hosts or cloud environments.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
The core idea of Claude Code mobile development is: **the phone is the controller, and real development runs elsewhere**.
|
||||
|
||||
Which approach you should choose depends on your specific needs.
|
||||
|
||||
If you are in mainland China, **Happy Coder** is recommended, especially when paired with domestic API relay configuration.
|
||||
|
||||
If you want the most convenient setup, use **Happy Coder**. Scan to connect, get push notifications, and switch devices smoothly.
|
||||
|
||||
If you need multi-model support or local-first architecture, use **HAPI**. It supports multiple assistants and self-hosted relay.
|
||||
|
||||
If you want the most complete development experience, use **SSH + Tailscale**. Setup is more complex, but capability is closest to desktop.
|
||||
|
||||
If you are an iOS user outside mainland China, the **official app** is the easiest way to begin.
|
||||
|
||||
If you are an Android user, **Termux** enables fully local development on the phone.
|
||||
|
||||
If you do not have an always-on computer, **cloud DevBox** is the ideal option.
|
||||
|
||||
No matter which solution you choose, security matters: be cautious with third-party relay for sensitive code, manage API keys properly, and prefer self-hosted or private paths for important projects.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Official Resources
|
||||
|
||||
- [Claude Code Official Docs](https://docs.anthropic.com/en/docs/claude-code) - complete official Claude Code documentation
|
||||
- [Claude iOS App](https://apps.apple.com/app/claude/id6473753684) - official iOS app
|
||||
|
||||
### Open Source Projects
|
||||
|
||||
- [slopus/happy](https://github.com/slopus/happy) (2.5k stars) - Happy Coder mobile client
|
||||
- [tiann/hapi](https://github.com/tiann/hapi) - HAPI local-first multi-model AI coding assistant
|
||||
- [siteboon/claudecodeui](https://github.com/siteboon/claudecodeui) - Claude Code UI (CloudCLI)
|
||||
- [juanfont/headscale](https://github.com/juanfont/headscale) (19k stars) - open-source Tailscale implementation
|
||||
|
||||
### Chinese Tutorials
|
||||
|
||||
- [Code Anytime Anywhere: Configure Claude Code on Phone](https://m.blog.csdn.net/haa_y/article/details/151156494) - Termux setup guide
|
||||
- [AI Lab in Your Pocket: Always-Online Claude Code Mobile Workflow](https://www.cnblogs.com/swizard/p/19308983) - Tmux + Docker approach
|
||||
- [I Took Claude Code Shopping with My Girlfriend](https://post.m.smzdm.com/p/a3r7d63d/) - Tailscale remote connection
|
||||
- [Build Production Apps from Phone](https://m.toutiao.com/article/7611823834756301318/) - real mobile development case
|
||||
|
||||
### English Resources
|
||||
|
||||
- [The Definitive Guide to Using Claude Code on Your Phone | Sealos Blog](https://sealos.io/blog/claude-code-on-phone/) - most comprehensive mobile guide
|
||||
- [SSH + Tailscale + Termius Complete Guide](https://m.blog.csdn.net/Lvyizhuo/article/details/157692953) - detailed remote connectivity guide
|
||||
|
||||
### Tool Downloads
|
||||
|
||||
- [Tailscale](https://tailscale.com/download) - peer-to-peer VPN tool
|
||||
- [Termux (F-Droid)](https://f-droid.org/en/packages/com.termux/) - Android terminal emulator
|
||||
- [Blink Shell](https://blink.sh/) - iOS SSH client (MOSH support)
|
||||
- [Termius](https://termius.com/) - cross-platform SSH client
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,613 @@
|
||||
# From Vibe Coding to Spec Coding: The Evolution of AI Programming
|
||||
|
||||
> "Code is a lossy projection of intent."
|
||||
> Code is a lossy projection of intent.
|
||||
> - Sean Grove, OpenAI, AI Engineer World's Fair 2025
|
||||
|
||||
## The Core Idea of Spec Coding: Everything Is Markdown
|
||||
|
||||
Before going deeper into Spec Coding, first understand the underlying philosophy of Claude Code: **everything is Markdown**.
|
||||
|
||||
In Claude Code's design philosophy, process records, information transfer, and even conversations with the model can all be Markdown:
|
||||
|
||||
- **CLAUDE.md**: a Markdown document for project conventions
|
||||
- **.claude/rules/**: a collection of layered Markdown rule files
|
||||
- **specs/**: Markdown descriptions of feature requirements
|
||||
- **Conversation history**: Claude Code's chat records are themselves in Markdown format
|
||||
- **AGENTS.md**: Markdown instructions that define agent behavior
|
||||
|
||||
This is exactly the core of Spec Coding: **the specification itself is code**. When you write requirements, design decisions, and acceptance criteria in Markdown, you are already writing "code" - AI will read that Markdown and then generate the real implementation.
|
||||
|
||||
Josh Beckman's summary of Grove's talk captures it perfectly:
|
||||
|
||||
> "Software engineering (and lawmaking and legal review) is specification repair."
|
||||
> Software engineering (and lawmaking and legal review) is specification repair.
|
||||
|
||||
In Claude Code, this "specification repair" process is: **modify Markdown -> AI reads Markdown -> generate/modify code -> verify the result**. The entire workflow is Markdown-driven.
|
||||
|
||||
---
|
||||
|
||||
## 1. Sean Grove's "The New Code": A Talk That Changes How You Think
|
||||
|
||||
In 2025, OpenAI researcher **Sean Grove** gave a talk titled **"The New Code"** at AI Engineer World's Fair, and it shook the entire developer community. He proposed a disruptive idea: **for 70 years we have been writing code to solve problems, but code is only a lossy projection of intent - specifications are the real "new code."**
|
||||
|
||||
That talk gave rise to a new development paradigm: **Spec Coding** - making specification documents, rather than code, the core artifact of development, and letting AI generate code from the specification.
|
||||
|
||||
Starting from Grove's talk, this article will help you understand the core ideas of Spec Coding, review the limits of Vibe Coding, and show how to apply this methodology in real development with Claude Code.
|
||||
|
||||
::: info 📚 What you will learn
|
||||
|
||||
1. Understand the key ideas in Sean Grove's "The New Code" talk
|
||||
2. Master the core concepts and methodology of Spec Coding
|
||||
3. Recognize both the value and the ceiling of Vibe Coding
|
||||
4. Learn how to practice a Spec Coding workflow in Claude Code
|
||||
5. Master a gradual transition strategy from Vibe Coding to Spec Coding
|
||||
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## 1. Sean Grove's "The New Code": A Talk That Changes How You Think
|
||||
|
||||
In 2025, OpenAI researcher Sean Grove gave a talk titled **"The New Code"** at AI Engineer World's Fair. This talk is widely seen as the intellectual starting point of the Spec Coding movement.
|
||||
|
||||
Grove previously founded OneGraph, a GraphQL developer tools company later acquired by Netlify, and now works on alignment reasoning at OpenAI - helping turn high-level intent into executable specifications and evaluation standards.
|
||||
|
||||
### 1.1 Core Argument: Code Is a Lossy Projection of Intent
|
||||
|
||||
The core concept of Grove's talk can be summarized in one sentence:
|
||||
|
||||
> **Code is a lossy projection of intent.**
|
||||
> Code is a lossy projection of intent.
|
||||
|
||||
What does that mean? When you have an idea in your head and turn it into code, a huge amount of context gets lost along the way - **why** you chose this approach, **what trade-offs** you considered, and **which constraints** mattered. The final code only preserves "how to do it," while losing "why it should be done this way."
|
||||
|
||||
It is like compressing a book into a tweet - the information density drops sharply, and the original intent is heavily degraded.
|
||||
|
||||
### 1.2 The Essence of Programming Is Communication
|
||||
|
||||
Grove proposed a simple but profound idea:
|
||||
|
||||
> "If you can communicate effectively, you can program."
|
||||
> If you can communicate effectively, you can program.
|
||||
|
||||
He argues that actual coding work only accounts for **10-20%** of development. The other 80% is **structured communication** around requirements and goals - understanding what users want, aligning with the team on solutions, defining acceptance criteria, and handling edge cases.
|
||||
|
||||
That means the core of programming ability is not mastery of syntax in a particular language, but the ability to **turn vague intent into precise descriptions**.
|
||||
|
||||
### 1.3 Whoever Writes the Spec Is the Programmer
|
||||
|
||||
This is Grove's most disruptive idea:
|
||||
|
||||
> "Whoever writes the spec - be it a PM, a lawmaker, an engineer, a marketer - is now the programmer."
|
||||
> Whoever writes the spec - be it a PM, a lawmaker, an engineer, a marketer - is now the programmer.
|
||||
|
||||
As AI becomes increasingly good at turning specifications into code, the **real programming work** shifts from "writing code" to "writing specifications." Whoever can express intent most precisely becomes the most valuable "programmer."
|
||||
|
||||
### 1.4 Specifications Can Have a Code-Like Toolchain
|
||||
|
||||
Grove pointed out that specifications can have a complete toolchain just like code:
|
||||
|
||||
> "Specs actually give us a very similar toolchain, but it's targeted at intentions rather than syntax."
|
||||
|
||||
- **Composition**: specifications can be modular and composable, like code modules
|
||||
- **Testing**: specifications can embed unit tests to verify that behavior matches expectations
|
||||
- **Linting**: ambiguous language in specifications can be detected, just like a linter catches syntax issues
|
||||
- **Consistency checks**: specifications across departments can be checked for consistency, similar to a type checker
|
||||
|
||||
### 1.5 OpenAI Model Spec: Living Proof
|
||||
|
||||
Grove used OpenAI's own **Model Spec** document as evidence.
|
||||
|
||||
When OpenAI discovered a sycophancy problem, they did not retrain the model. Instead, they **modified the specification document**. The change propagated automatically across the system, and the issue was corrected.
|
||||
|
||||
This proves a crucial point: **the specification itself can act like executable code**. Changing the specification is equivalent to changing behavior, without touching a single line of traditional code.
|
||||
|
||||
Josh Beckman's summary of Grove's talk captures it perfectly:
|
||||
|
||||
> "Software engineering (and lawmaking and legal review) is specification repair."
|
||||
> Software engineering (and lawmaking and legal review) is specification repair.
|
||||
|
||||
---
|
||||
|
||||
## 2. Spec Coding: Specification as Code
|
||||
|
||||
### 2.1 What Is Spec Coding
|
||||
|
||||
Spec Coding, also called Spec-Driven Development (SDD), is a methodology that treats **specification documents as the core artifact of development**.
|
||||
|
||||
The core idea is: **write the specification clearly first, then let AI generate code from that specification. The specification is the source of truth, and code is only the implementation artifact derived from it.**
|
||||
|
||||
Robert C. Martin's classic statement from *Clean Code* becomes newly relevant in the AI era:
|
||||
|
||||
> "Specifying requirements so precisely that a machine can execute them is programming."
|
||||
> Specifying requirements so precisely that a machine can execute them is programming.
|
||||
|
||||
### 2.2 Comparing Vibe Coding and Spec Coding
|
||||
|
||||
| Dimension | Vibe Coding | Spec Coding |
|
||||
|------|------------|-------------|
|
||||
| **Approach** | Improvised prompts, iterative back-and-forth | Write a complete specification first, then generate code |
|
||||
| **Best for** | Prototypes, hackathons, exploration | Production systems, team collaboration, enterprise work |
|
||||
| **Code quality** | Fast but fragile | Structured, testable, auditable |
|
||||
| **First-pass success rate** | Unstable | Targets 95%+ |
|
||||
| **Reusability** | One-off prompts | Specifications can be reused across projects |
|
||||
| **Security** | Easy to overlook things | Built in at the specification layer |
|
||||
| **Documentation** | Missing or always lagging behind | The specification is the documentation and stays maintained |
|
||||
| **Team collaboration** | Depends on personal prompting skill | Shared specifications, shared standards |
|
||||
|
||||
The two are not opposites. As Brad Jolicoeur points out:
|
||||
|
||||
> "Clever engineers will even use vibe coding as a first step to generate the initial draft of a specification."
|
||||
> Clever engineers will even use vibe coding as a first step to generate the initial draft of a specification.
|
||||
|
||||
### 2.3 The Three-Layer Specification Structure of Spec Coding
|
||||
|
||||
Engineers at Red Hat summarized a practical three-layer specification model:
|
||||
|
||||
**Layer 1: Functional Specification (What)**
|
||||
|
||||
Describe the expected result in natural language and answer "what should it do":
|
||||
|
||||
```markdown
|
||||
## User Authentication Feature
|
||||
|
||||
### User Stories
|
||||
- As a new user, I want to register with my email
|
||||
- As a registered user, I want to log in with email and password
|
||||
- As a user who forgot my password, I want to reset it by email
|
||||
|
||||
### Acceptance Criteria
|
||||
- Validate email format and password strength during registration
|
||||
- Lock the account for 15 minutes after 5 failed login attempts
|
||||
- Password reset links are valid for 30 minutes
|
||||
```
|
||||
|
||||
**Layer 2: Language-Agnostic Specification (How - Architecture Layer)**
|
||||
|
||||
Define data structures, architectural patterns, and security requirements:
|
||||
|
||||
```markdown
|
||||
## Technical Design
|
||||
|
||||
### Data Model
|
||||
- users table: id, email, password_hash, created_at, locked_until
|
||||
- sessions table: id, user_id, token, expires_at
|
||||
|
||||
### API Design
|
||||
- POST /api/auth/register -> 201 Created
|
||||
- POST /api/auth/login -> 200 OK + JWT
|
||||
- POST /api/auth/reset-password -> 202 Accepted
|
||||
|
||||
### Security Requirements
|
||||
- Passwords use bcrypt with cost factor >= 12
|
||||
- JWT expires in 15 minutes, refresh token in 7 days
|
||||
- Enable rate limiting on all endpoints
|
||||
```
|
||||
|
||||
**Layer 3: Language-Specific Specification (How - Implementation Layer)**
|
||||
|
||||
Version requirements, test framework, and documentation standards:
|
||||
|
||||
```markdown
|
||||
## Implementation Constraints
|
||||
|
||||
### Tech Stack
|
||||
- Runtime: Node.js 20+
|
||||
- Framework: Express 5
|
||||
- ORM: Prisma
|
||||
- Testing: Vitest
|
||||
|
||||
### Code Conventions
|
||||
- Use TypeScript strict mode
|
||||
- Use a custom AppError class for error handling
|
||||
- All API endpoints require JSDoc comments
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Practicing Spec Coding in Claude Code
|
||||
|
||||
Once you understand the theory, the next question is how to apply it in Claude Code. Claude Code's design philosophy naturally fits Spec Coding - its `CLAUDE.md`, Rules directory, and `/plan` command are all forms of specification-driven development.
|
||||
|
||||
When OpenAI itself builds projects with Codex, it uses a similar pattern: using an `AGENTS.md` file as a specification to guide the AI agent. Their core lesson is this: **when the agent struggles, treat that as a signal - identify what is missing, whether it is tools, guardrails, or documentation, and then add it to the repository**. That aligns perfectly with Spec Coding: specifications are living artifacts and should keep evolving.
|
||||
|
||||
Research from Augment Code supports the same conclusion: **executable specifications stay accurate because AI agents generate code directly from them, creating a forcing function - outdated specifications produce broken implementations**. That means specifications do not rot the way traditional documentation does.
|
||||
|
||||
### 3.1 Step One: Use `CLAUDE.md` to Establish Project Specifications
|
||||
|
||||
`CLAUDE.md` is the "living specification" of your project. Every time Claude Code starts, it reads this file, which is equivalent to giving AI a persistent project handbook.
|
||||
|
||||
In the earlier chapter [Claude Code Quick Start Core Guide](../basics/), we already learned how to create `CLAUDE.md`. In the context of Spec Coding, its role becomes even more important - **it is not just a config file, but the entry point to the project specification**.
|
||||
|
||||
Engineers at LogRocket emphasize that **solid context is crucial for AI agents because it prevents hallucinations and inefficiency**. Without specifications, an AI agent may make large, uncontrolled changes to a project. `CLAUDE.md` is the first line of defense that provides that "solid context."
|
||||
|
||||
```markdown
|
||||
# E-commerce Project Specification
|
||||
|
||||
## Project Positioning
|
||||
A SaaS e-commerce platform for small and medium-sized merchants, supporting multiple stores and multiple payment channels.
|
||||
|
||||
## Architectural Decisions
|
||||
- Frontend-backend separation with an API-first design
|
||||
- Microservice backend architecture, with services communicating through a message queue
|
||||
- Read-write database separation
|
||||
|
||||
## Core Constraints
|
||||
- Store all monetary amounts as integers in cents to avoid floating-point precision issues
|
||||
- The order state machine must strictly follow: pending payment -> paid -> shipped -> completed
|
||||
- Payment-related endpoints must be idempotent
|
||||
```
|
||||
|
||||
Aviator's team summarized the key information that specifications should capture - and that is exactly what your `CLAUDE.md` should cover:
|
||||
|
||||
- input and output formats and data types
|
||||
- business rules and edge cases
|
||||
- system dependencies and constraints
|
||||
- performance and scalability requirements
|
||||
- error handling and security requirements
|
||||
|
||||
### 3.2 Step Two: Use the Rules Directory to Manage Layered Specifications
|
||||
|
||||
As your project grows, a single `CLAUDE.md` will not be enough. At that point, use the `.claude/rules/` directory to organize layered specifications.
|
||||
|
||||
This is exactly what Augment Code calls the idea of "executable specifications": **specifications are not static documents, but living instructions consumed directly by AI agents**. When you split rules into the Rules directory, each rule file is loaded only when related files are being edited, which both saves tokens and preserves precision.
|
||||
|
||||
Engineers at Tessl found that breaking requirements into structured documents - with a PRD defining "what and why," and technical specifications defining "how" - helps prevent AI from accumulating confusion in long conversations and significantly improves output consistency.
|
||||
|
||||
```text
|
||||
.claude/rules/
|
||||
├── 00-architecture.md # Architecture rules (global)
|
||||
├── 01-security.md # Security rules (global)
|
||||
├── 10-api-design.md # API design rules
|
||||
├── 11-frontend-patterns.md # Frontend pattern rules
|
||||
├── 12-database.md # Database rules
|
||||
└── 20-testing.md # Testing rules
|
||||
```
|
||||
|
||||
Each rule file can specify its scope through frontmatter:
|
||||
|
||||
```markdown
|
||||
---
|
||||
globs:
|
||||
- "src/api/**/*.ts"
|
||||
- "src/services/**/*.ts"
|
||||
---
|
||||
|
||||
# API Design Rules
|
||||
|
||||
## Route Design
|
||||
- RESTful style, use plural nouns: /api/v1/orders
|
||||
- Nested resources can go at most two levels deep: /api/v1/users/123/orders
|
||||
|
||||
## Response Format
|
||||
- Success: { data, pagination? }
|
||||
- Error: { error: { code, message, details? } }
|
||||
|
||||
## Must Follow
|
||||
- All write operations require authentication
|
||||
- All list endpoints must support pagination
|
||||
- Sensitive operations must write audit logs
|
||||
```
|
||||
|
||||
That way, when Claude Code edits API-related files, it will automatically load this specification and make sure the generated code follows the standard.
|
||||
|
||||
### 3.3 Step Three: Use `/plan` to Implement Specify -> Plan -> Tasks -> Implement
|
||||
|
||||
The standard Spec Coding workflow is a four-stage loop. GitHub Spec Kit standardizes it as Specify -> Plan -> Tasks -> Implement, and Claude Code's `/plan` command naturally supports this flow.
|
||||
|
||||
The SpecThis team emphasized one key principle: **define boundaries before the agent runs - know what should change before any code changes happen**. That is exactly the value of `/plan`.
|
||||
|
||||
**Stage 1: Specify**
|
||||
|
||||
First write clearly what you want to build. Do not rush into code:
|
||||
|
||||
```text
|
||||
/plan
|
||||
I need to implement an order refund feature. The specification is:
|
||||
|
||||
Functional requirements:
|
||||
- Users can request a full refund before shipment
|
||||
- Within 7 days after shipment, users can request a return and refund
|
||||
- Refunds require administrator approval
|
||||
|
||||
Acceptance criteria:
|
||||
- The refund amount cannot exceed the amount actually paid for the order
|
||||
- Refund state machine: requested -> approved -> refunding -> refunded
|
||||
- Inventory is restored after the refund is completed
|
||||
- Log every operation throughout the process
|
||||
```
|
||||
|
||||
**Stage 2: Plan**
|
||||
|
||||
Claude will generate a technical plan based on your specification:
|
||||
|
||||
```text
|
||||
📋 Refund Feature Implementation Plan
|
||||
|
||||
1. Data model design
|
||||
- Create a refunds table
|
||||
- Add refund-related states to the order state machine
|
||||
|
||||
2. API design
|
||||
- POST /api/orders/:id/refund - request a refund
|
||||
- PUT /api/refunds/:id/approve - approve a refund
|
||||
- GET /api/refunds - refund list
|
||||
|
||||
3. Business logic
|
||||
- Refund eligibility checks
|
||||
- Refund amount calculation
|
||||
- Inventory restoration logic
|
||||
|
||||
4. Integrations
|
||||
- Connect to the payment provider's refund API
|
||||
- Send refund notifications
|
||||
```
|
||||
|
||||
**Stage 3: Tasks**
|
||||
|
||||
Break the plan into small tasks that can be executed independently, and give each task a clear completion standard.
|
||||
|
||||
**Stage 4: Implement**
|
||||
|
||||
Implement one task at a time, validating after each one is completed.
|
||||
|
||||
### 3.4 Real Example: Building a User Notification System with Spec Coding
|
||||
|
||||
Let's use a full example to compare Vibe Coding and Spec Coding. Data from Orchestrator.dev shows that in the 2025 Stack Overflow survey, 84% of developers use or plan to use AI tools, but only 22% are satisfied with the results, and 46% believe accuracy is a problem. Spec Coding is exactly the key to closing that satisfaction gap.
|
||||
|
||||
**Vibe Coding approach:**
|
||||
|
||||
```text
|
||||
You: Build a notification feature
|
||||
AI: [Immediately starts writing code and generates a simple notification list]
|
||||
|
||||
You: It should support read and unread
|
||||
AI: [Modifies the code and adds a read field]
|
||||
|
||||
You: It also needs multiple notification types
|
||||
AI: [Changes it again and adds a type field]
|
||||
|
||||
You: It should push notifications to phones too
|
||||
AI: [Makes a big rewrite, and the previous structure no longer fits very well...]
|
||||
```
|
||||
|
||||
Result: after four rounds of changes, the architecture has been overturned again and again, and the code gets messier over time.
|
||||
|
||||
**Spec Coding approach:**
|
||||
|
||||
First write a specification document `specs/notification.md`:
|
||||
|
||||
```markdown
|
||||
# User Notification System Specification
|
||||
|
||||
## Functional Requirements
|
||||
1. Support three channels: in-app notifications, email notifications, and push notifications
|
||||
2. Notification types: system announcements, order status, promotional campaigns, security alerts
|
||||
3. Users can configure notification preferences by channel and type
|
||||
4. Support read/unread state and bulk mark-as-read
|
||||
|
||||
## Data Model
|
||||
- notifications table: id, user_id, type, channel, title, content,
|
||||
is_read, created_at
|
||||
- notification_preferences table: user_id, type, channel, enabled
|
||||
|
||||
## API Design
|
||||
- GET /api/notifications?type=&is_read= - get notification list (paginated)
|
||||
- PUT /api/notifications/:id/read - mark as read
|
||||
- PUT /api/notifications/read-all - mark all as read
|
||||
- GET /api/notification-preferences - get preference settings
|
||||
- PUT /api/notification-preferences - update preference settings
|
||||
|
||||
## Acceptance Criteria
|
||||
- The unread notification count updates in real time
|
||||
- The notification list supports infinite scrolling
|
||||
- Push notification latency < 3 seconds
|
||||
- Preference changes take effect immediately
|
||||
```
|
||||
|
||||
Then in Claude Code:
|
||||
|
||||
```text
|
||||
@specs/notification.md
|
||||
Implement the user notification system according to this specification.
|
||||
Start with the data model, then implement the API, and finally build the frontend components.
|
||||
Pause after each module is complete, and I will confirm before you continue.
|
||||
```
|
||||
|
||||
Result: it lands cleanly in one go, with a clear architecture and no need to repeatedly tear things down and rebuild them.
|
||||
|
||||
### 3.5 Strengthening Spec Coding with Superpowers
|
||||
|
||||
In the earlier chapter [Superpowers for Engineering-Grade Development](../superpowers/), we learned about the Superpowers skill system. Spec Coding and Superpowers are natural companions:
|
||||
|
||||
| Spec Coding Stage | Matching Superpowers Skill |
|
||||
|------------------|---------------------|
|
||||
| Define the specification | `brainstorming` - use Socratic questioning to clarify requirements |
|
||||
| Technical planning | `writing-plans` - break the specification into small tasks |
|
||||
| Incremental implementation | `test-driven-development` - TDD red-green-refactor |
|
||||
| Quality verification | `code-review` + `verification-before-completion` |
|
||||
|
||||
**Example of combined usage:**
|
||||
|
||||
```text
|
||||
@specs/notification.md
|
||||
Implement the notification system according to this specification using TDD,
|
||||
and help me review the code after it is done
|
||||
```
|
||||
|
||||
This single instruction activates both the Spec Coding workflow and Superpowers skills like TDD and Code Review, forming a complete engineering-grade development process.
|
||||
|
||||
### 3.6 Version Control and Continuous Evolution of Specifications
|
||||
|
||||
The Vibe Coding Substack proposed an important viewpoint: **Specs are now code**. If specifications are code, then they should be managed like code:
|
||||
|
||||
- **Version control**: keep specification files in Git and commit them together with the code
|
||||
- **Change tracking**: every change to the specification has a commit record so you know who changed what and why
|
||||
- **Code review**: changes to specifications should also go through PR review so the team stays aligned
|
||||
- **CI integration**: specification changes trigger automated tests to verify whether the implementation still conforms to the specification
|
||||
|
||||
In Claude Code, that means your `CLAUDE.md`, `.claude/rules/`, and `specs/` directory should all be version-controlled. Robomotion's experience is that **versioning specifications together with implementations prevents drift and keeps everything auditable**.
|
||||
|
||||
OpenAI's Harness Engineering practice also confirms this: their `AGENTS.md` file is itself written by Codex and is continuously updated as the project evolves. When the agent encounters difficulties, the fix is not to change the code directly, but to **have Codex update the specification itself** - forming a self-healing loop for specifications.
|
||||
|
||||
---
|
||||
|
||||
## 4. A Hybrid Strategy: Gradually Moving from Vibe to Spec
|
||||
|
||||
The industry consensus is not "abandon Vibe Coding," but rather **choose the right approach for the right scenario**.
|
||||
|
||||
### 4.1 When to Use Vibe Coding
|
||||
|
||||
- Validate whether an idea is feasible, with a prototype built within 30 minutes
|
||||
- Explore unfamiliar technologies or frameworks
|
||||
- Hackathons or internal demos
|
||||
- One-off scripts or tools
|
||||
|
||||
### 4.2 When to Use Spec Coding
|
||||
|
||||
- Production feature development
|
||||
- Multi-person collaborative projects
|
||||
- Code that will need long-term maintenance
|
||||
- Sensitive domains such as security, payments, or data
|
||||
- API design and system integration
|
||||
|
||||
### 4.3 A Recommended Gradual Workflow
|
||||
|
||||
**Stage 1: Vibe Exploration**
|
||||
|
||||
Use Vibe Coding to validate the idea quickly. Do not write specifications yet, and do not worry about code quality:
|
||||
|
||||
```text
|
||||
Build a simple notification popup so we can see how it feels
|
||||
```
|
||||
|
||||
**Stage 2: Refine the Specification**
|
||||
|
||||
Once feasibility is confirmed, organize what you learned during exploration into a specification. You can even ask AI to help:
|
||||
|
||||
```text
|
||||
Based on the notification feature prototype we just built,
|
||||
help me organize a formal functional specification document,
|
||||
including the data model, API design, and acceptance criteria
|
||||
```
|
||||
|
||||
**Stage 3: Rebuild with Spec**
|
||||
|
||||
Based on that specification, re-implement the production-grade version using Spec Coding:
|
||||
|
||||
```text
|
||||
@specs/notification.md
|
||||
Implement this from scratch according to the specification, and do not refer to the previous prototype code
|
||||
```
|
||||
|
||||
The advantage of this workflow is clear: **use the speed of Vibe Coding to validate direction, and the quality of Spec Coding to deliver the product**.
|
||||
|
||||
Robomotion summarized it well:
|
||||
|
||||
> "The spec is the source of truth. The AI generated output is the draft implementation. Validation is not optional."
|
||||
> The spec is the source of truth. The AI generated output is the draft implementation. Validation is not optional.
|
||||
|
||||
---
|
||||
|
||||
## 5. Frequently Asked Questions
|
||||
|
||||
### Q1: Doesn't Spec Coding feel too slow?
|
||||
|
||||
Writing specifications does require up-front investment. But Greg Ceccarelli's team used Spec Coding to deliver a complete macOS product with **three people in four weeks** - something that would be nearly impossible in traditional development.
|
||||
|
||||
The time spent writing specifications early will be recovered later through less rework, fewer bugs, and lower communication cost.
|
||||
|
||||
### Q2: How detailed should a specification be?
|
||||
|
||||
Robomotion's suggestion is: **a high-quality specification can be only one page**. What matters is whether it answers these eight questions:
|
||||
|
||||
1. What are we automating?
|
||||
2. What is the input?
|
||||
3. What is the output?
|
||||
4. What are the constraints?
|
||||
5. What are the failure modes?
|
||||
6. What are the security requirements?
|
||||
7. What are the performance requirements?
|
||||
8. What tests prove that it works?
|
||||
|
||||
### Q3: What if AI only does exactly what the specification says and misses "obvious" features?
|
||||
|
||||
This really is one limitation of Spec Coding. Feedback from GitHub Spec Kit users is that AI will do **"exactly and only"** what is written in the specification.
|
||||
|
||||
The solution is to add a "non-functional requirements" section to the specification and list common expectations there, such as error handling, logging, and accessibility. Or set global rules in `CLAUDE.md`.
|
||||
|
||||
### Q4: Do small projects also need Spec Coding?
|
||||
|
||||
No. Spec Coding is best suited to:
|
||||
|
||||
- production-grade projects
|
||||
- collaborative team projects
|
||||
- projects that need long-term maintenance
|
||||
|
||||
For quick prototypes, one-off scripts, and learning experiments, Vibe Coding is more suitable.
|
||||
|
||||
### Q5: How do you get a team to accept Spec Coding?
|
||||
|
||||
Start with a small feature as a pilot. Let the team see how Spec Coding reduces rework and improves first-pass success. The Stack Overflow 2025 survey shows that 84% of developers use or plan to use AI tools, but only 22% are satisfied with the results - Spec Coding is exactly the key to improving that satisfaction.
|
||||
|
||||
---
|
||||
|
||||
## 6. Summary
|
||||
|
||||
Moving from Vibe Coding to Spec Coding is not a revolution. It is an evolution.
|
||||
|
||||
Sean Grove made it very clear in "The New Code": **for 70 years, we have been writing code to solve problems; now we should be writing specifications to generate code**. Code is a lossy projection of intent, while specifications can fully capture intent, context, and constraints.
|
||||
|
||||
For developers using Claude Code, this shift is already happening:
|
||||
|
||||
- the `CLAUDE.md` you write is your project specification
|
||||
- the Rules directory you configure is your layered specification system
|
||||
- the planning you do with `/plan` is the Specify -> Plan -> Tasks flow
|
||||
- combining TDD and Code Review from Superpowers gives you a complete Spec Coding workflow
|
||||
|
||||
**Key takeaways:**
|
||||
|
||||
- Vibe Coding is suitable for exploration and prototypes, while Spec Coding is suitable for production and collaboration
|
||||
- The specification is the source of truth, and code is an implementation artifact produced from it
|
||||
- The ability to write specifications = programming ability, and communication ability matters more than syntax ability
|
||||
- Start small: just by writing `CLAUDE.md` well, you have already taken the first step into Spec Coding
|
||||
|
||||
::: tip 💡 Next step
|
||||
In the next chapter, we will learn how to use Claude Code's Agent Teams capability so multiple AI instances can collaborate like a real development team.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Related to Sean Grove's "The New Code" Talk
|
||||
|
||||
- [Code is just a lossy projection of intent — The Decoder](https://the-decoder.com/code-is-just-a-lossy-projection-of-intent-according-to-openai-researcher-sean-grove/)
|
||||
- [The End of Coding? How Specifications Are Becoming the New Source Code — Implicator](https://www.implicator.ai/the-end-of-coding-how-specifications-are-becoming-the-new-source-code/)
|
||||
- [OpenAI: Intent, Not Code, Drives Future Software Development — AI Tech Suite](https://www.aitechsuite.com/ai-news/openai-intent-not-code-drives-future-software-development)
|
||||
- [Note on The New Code — Josh Beckman](https://www.joshbeckman.org/notes/914234100)
|
||||
- [Full Transcript of "The New Code"](https://lawwu.github.io/transcripts/8rABwKRsec4.html)
|
||||
|
||||
### Spec Coding Methodology
|
||||
|
||||
- [How spec-driven development improves AI coding quality — Red Hat](https://developers.redhat.com/articles/2025/10/22/how-spec-driven-development-improves-ai-coding-quality)
|
||||
- [Spec-Driven Development with AI: Complete 2025 Guide — Dplooy](https://www.dplooy.com/blog/spec-driven-development-with-ai-complete-2025-guide)
|
||||
- [Spec-Driven Development: Building Production-Ready Software with AI — Orchestrator.dev](https://orchestrator.dev/blog/2025-12-16-spec_driven_dev_article)
|
||||
- [Agents Code but the Problem of Clear Specification Remains — Greg Ceccarelli](https://www.gregceccarelli.com/writing/beyond-code-centric)
|
||||
|
||||
### Vibe Coding vs. Spec Coding
|
||||
|
||||
- [Vibe Coding vs Spec Driven — Cosmo Edge](https://cosmo-edge.com/vibe-coding-vs-spec-driven-ai-development/)
|
||||
- [Master AI in Software Engineering: Vibe vs. Spec Coding — Brad Jolicoeur](https://bradjolicoeur.com/article/ai-software-engineering-vibe-spec-prompting)
|
||||
- [From Vibe Coding to Spec-Driven Development — Tessl](https://tessl.io/blog/from-vibe-coding-to-spec-driven-development/)
|
||||
- [Spec First Approach for Enterprise — Robomotion](https://robomotion.io/blog/spec-first-approach-the-way-to-adapt-vibe-coding-for-enterprise-work)
|
||||
|
||||
### Tools and Practices
|
||||
|
||||
- [GitHub Spec Kit vs Vibe Coding — Ossels](https://ossels.ai/github-spec-kit-spec-driven-development/)
|
||||
- [A Spec-First Workflow for Agentic AI — LogRocket](https://blog.logrocket.com/spec-first-workflow-agentic-ai/)
|
||||
- [Specs Are Now Code — The Vibe Coding Substack](https://thevibecoding.substack.com/p/specs-are-now-code)
|
||||
- [Harness Engineering — Martin Fowler](https://martinfowler.com/articles/exploring-gen-ai/harness-engineering.html)
|
||||
- [Spec-Driven Development & AI Agents Explained — Augment Code](https://www.augmentcode.com/guides/spec-driven-development-ai-agents-explained)
|
||||
- [Spec-Driven Development: The Key to Scalable AI Agents — Aviator](https://www.aviator.co/blog/spec-driven-development/)
|
||||
@@ -0,0 +1,633 @@
|
||||
# Claude Code Superpowers for Engineering-Grade Development
|
||||
|
||||
## Introduction to Superpowers
|
||||
|
||||
**Superpowers** is an open-source agent skills framework created by Jesse Vincent (online handle: obra), specifically designed to solve a core problem in AI programming: how to make AI produce "engineering-grade" code instead of "toy-grade" code.
|
||||
|
||||
Imagine a normal AI coding assistant as a "smart intern." It can write runnable code, but it may have no tests, no documentation, and no best-practice discipline. Superpowers is like assigning a "senior engineer mentor" to that intern, forcing it to follow a complete software development process.
|
||||
|
||||
### Why Do We Need Superpowers?
|
||||
|
||||
Before Superpowers, there were several issues when using Claude Code:
|
||||
|
||||
- **Chaos in vibe coding**: AI starts coding directly without planning, causing frequent rework
|
||||
- **Lack of TDD discipline**: AI tends to write code first and add tests later, or skip tests entirely
|
||||
- **Coding with vague requirements**: user says "build a login feature," AI starts immediately, and the result is not what was wanted
|
||||
- **Unstable code quality**: no code-review mechanism, so quality depends on AI "mood"
|
||||
|
||||
Superpowers solves these issues and turns Claude into a "disciplined development team." It helps clarify requirements first, then creates a plan, then develops with TDD, and finally ensures quality through code review.
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Step 1: Install Superpowers
|
||||
|
||||
Run in Claude Code:
|
||||
|
||||
```bash
|
||||
# Add marketplace
|
||||
/plugin marketplace add obra/superpowers-marketplace
|
||||
|
||||
# Install superpowers
|
||||
/plugin install superpowers@superpowers-marketplace
|
||||
```
|
||||
|
||||
Or clone manually:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/obra/superpowers.git ~/.claude/skills/superpowers
|
||||
```
|
||||
|
||||
### Step 2: Try Your First Skill
|
||||
|
||||
Let's use Superpowers' **brainstorming** skill to experience its value.
|
||||
|
||||
In Claude Code, type:
|
||||
|
||||
```text
|
||||
Build me a user login feature
|
||||
```
|
||||
|
||||
**Before Superpowers**: Claude starts writing code directly and may produce something you do not really want.
|
||||
|
||||
**With Superpowers**: Claude uses Socratic questions to help clarify requirements:
|
||||
|
||||
> Is this login feature for a Web app or a mobile app?
|
||||
>
|
||||
> Which login methods are required? Email/password? Third-party login (Google, GitHub)?
|
||||
>
|
||||
> Do you need a "remember me" feature?
|
||||
>
|
||||
> Should password reset be via email or SMS?
|
||||
>
|
||||
> ...
|
||||
|
||||
These questions force you to clarify what you actually need before coding, preventing a lot of unused code.
|
||||
|
||||
### Step 3: Understand Skill Trigger Mechanisms
|
||||
|
||||
Superpowers is not a "magic switch." It is a **set of skills**. Understanding how skills are triggered is important.
|
||||
|
||||
**Three trigger methods:**
|
||||
|
||||
1. **Keyword trigger**
|
||||
- When you mention "TDD," "test-driven development," or "write tests first"
|
||||
- The `test-driven-development` skill is activated
|
||||
|
||||
2. **Scenario trigger**
|
||||
- When requirements are unclear, `brainstorming` asks proactive questions
|
||||
- When bugs appear, `systematic-debugging` is activated
|
||||
|
||||
3. **Manual invocation**
|
||||
- Use skill names directly, such as: `/test-driven-development`
|
||||
|
||||
#### 💡 Important Clarification: What Happens If You Do Not Specify TDD?
|
||||
|
||||
This is a common misunderstanding. Let's clarify:
|
||||
|
||||
```text
|
||||
# Case A: TDD not mentioned
|
||||
"Implement a calculator"
|
||||
-> Claude may write tests, or may not
|
||||
-> Depends on the model's own habits
|
||||
|
||||
# Case B: TDD explicitly requested
|
||||
"Implement a calculator with TDD"
|
||||
-> test-driven-development skill is activated
|
||||
-> RED-GREEN-REFACTOR is enforced
|
||||
```
|
||||
|
||||
**The real value of Superpowers**: not creating abilities from nothing, but strengthening discipline.
|
||||
|
||||
- Without the TDD skill: Claude writing tests is "maybe"
|
||||
- With the TDD skill: Claude is forced to follow TDD flow
|
||||
|
||||
### Understanding the Value of Superpowers
|
||||
|
||||
From the explanation above, the core value of Superpowers is clear:
|
||||
|
||||
1. **Requirements first**: `brainstorming` asks actively when requirements are vague
|
||||
2. **Process discipline**: `test-driven-development` enforces the TDD red-green-refactor cycle
|
||||
3. **Task decomposition**: `writing-plans` breaks large projects into small tasks
|
||||
4. **Quality control**: `code-review` skills ensure code quality
|
||||
|
||||
---
|
||||
|
||||
## Superpowers Core Skills in Detail
|
||||
|
||||
Superpowers includes **20+ composable skills** covering the full software lifecycle. Let's go through them by category.
|
||||
|
||||
### 🧪 Testing Skills
|
||||
|
||||
#### test-driven-development
|
||||
|
||||
**How to trigger**: mention keywords like "TDD," "test-driven development," or "write tests first."
|
||||
|
||||
**What this skill does**: forces Claude to follow the TDD red-green-refactor cycle instead of "maybe writing tests later."
|
||||
|
||||
**Traditional approach** (common problems):
|
||||
1. Write code directly
|
||||
2. Do a quick manual test
|
||||
3. Find bugs and patch code
|
||||
4. Repeat... (tests? maybe next time)
|
||||
|
||||
**TDD approach** (after skill activation):
|
||||
1. 🔴 **RED**: write a failing test first
|
||||
2. 🟢 **GREEN**: write the minimal code to pass the test
|
||||
3. 🔵 **REFACTOR**: refactor while keeping tests passing
|
||||
4. Repeat
|
||||
|
||||
**Example usage**:
|
||||
|
||||
```text
|
||||
Implement a user authentication module using TDD
|
||||
```
|
||||
|
||||
Claude will:
|
||||
1. Write tests first (username/password validation, token generation, etc.)
|
||||
2. Run tests and confirm all fail (RED)
|
||||
3. Write minimal implementation code
|
||||
4. Run tests and confirm pass (GREEN)
|
||||
5. Refactor code and extract shared logic
|
||||
6. Run tests again and confirm pass (REFACTOR)
|
||||
|
||||
> **Note**: if you do not mention "TDD," Claude may or may not write tests. The skill's role is to **enforce process discipline** so tests are not "forgotten."
|
||||
|
||||
### 🐛 Debugging Skills
|
||||
|
||||
#### systematic-debugging
|
||||
|
||||
When bugs appear, human developers often try random fixes. Superpowers enforces four-stage root-cause analysis:
|
||||
|
||||
**Stage 1: Reproduce the issue**
|
||||
- Confirm the bug can be reproduced consistently
|
||||
- Record exact reproduction steps
|
||||
|
||||
**Stage 2: Isolate root cause**
|
||||
- Narrow the scope using binary search style reduction
|
||||
- Add logs to locate problematic code
|
||||
|
||||
**Stage 3: Validate hypotheses**
|
||||
- Propose root-cause hypotheses
|
||||
- Design validation experiments
|
||||
|
||||
**Stage 4: Fix and verify**
|
||||
- Implement the fix
|
||||
- Confirm the bug is resolved
|
||||
- Add regression tests
|
||||
|
||||
#### verification-before-completion
|
||||
|
||||
This skill prevents Claude from stopping when things only "seem close enough." Before claiming completion, it requires Claude to:
|
||||
|
||||
1. Run all tests
|
||||
2. Manually test key features
|
||||
3. Check code quality (lint)
|
||||
4. Confirm documentation is updated
|
||||
|
||||
### 🤝 Collaboration Skills
|
||||
|
||||
#### brainstorming
|
||||
|
||||
This is one of the most interesting Superpowers skills. It uses Socratic questioning to clarify requirements.
|
||||
|
||||
**How it works**: when you provide a vague requirement, Claude does not start coding directly. It asks questions:
|
||||
|
||||
```text
|
||||
You: Build a blog system
|
||||
|
||||
Claude:
|
||||
- Who is this blog for? Technical readers or general readers?
|
||||
- Should Markdown editing be supported?
|
||||
- Do you need comments?
|
||||
- Do you need search?
|
||||
- Single-user or multi-author?
|
||||
- ...
|
||||
```
|
||||
|
||||
These questions force you to think about what functionality is truly needed, avoiding lots of unused features.
|
||||
|
||||
#### writing-plans
|
||||
|
||||
This skill decomposes large tasks into small tasks that can each be completed in 2-5 minutes.
|
||||
|
||||
**Example**:
|
||||
|
||||
```text
|
||||
Use writing-plans to plan development of a todo API
|
||||
```
|
||||
|
||||
Claude will generate a detailed plan:
|
||||
|
||||
```markdown
|
||||
# Implementation Plan
|
||||
|
||||
## Task 1: Design database schema (estimated 5 minutes)
|
||||
- Create todos table
|
||||
- Define fields: id, title, completed, createdAt
|
||||
|
||||
## Task 2: Create Express routes (estimated 10 minutes)
|
||||
- POST /todos - create task
|
||||
- GET /todos - list tasks
|
||||
- GET /todos/:id - get one task
|
||||
- PUT /todos/:id - update
|
||||
- DELETE /todos/:id - delete
|
||||
|
||||
## Task 3: Add input validation (estimated 10 minutes)
|
||||
- title cannot be empty
|
||||
- completed must be boolean
|
||||
|
||||
## Task 4: Write tests (estimated 15 minutes)
|
||||
- Write tests for each endpoint
|
||||
- Cover edge cases
|
||||
|
||||
## Task 5: Start server and verify (estimated 5 minutes)
|
||||
- Run tests
|
||||
- Manually test API
|
||||
|
||||
Acceptance criteria:
|
||||
- All tests pass
|
||||
- curl test passes for every endpoint
|
||||
```
|
||||
|
||||
#### executing-plans
|
||||
|
||||
This skill executes a plan in batches and pauses at each checkpoint for confirmation.
|
||||
|
||||
**Usage example**:
|
||||
|
||||
```text
|
||||
Execute the plan above, and pause after each completed task
|
||||
```
|
||||
|
||||
Claude will:
|
||||
1. Finish task 1, then pause: `✅ Database schema done. Continue?`
|
||||
2. After your confirmation, finish task 2 and pause again
|
||||
3. And so on
|
||||
|
||||
This lets you verify direction at every stage, avoiding late discovery that things drifted off track.
|
||||
|
||||
#### dispatching-parallel-agents
|
||||
|
||||
This skill can launch multiple sub-agents in parallel.
|
||||
|
||||
**Use case**: when you need to process multiple independent tasks simultaneously.
|
||||
|
||||
```text
|
||||
Use parallel agents to complete:
|
||||
- Agent A: write backend APIs
|
||||
- Agent B: write frontend components
|
||||
- Agent C: write tests
|
||||
```
|
||||
|
||||
Each agent works in its own isolated environment without interference.
|
||||
|
||||
#### subagent-driven-development
|
||||
|
||||
This skill launches an independent sub-agent for each small task.
|
||||
|
||||
**Advantages**:
|
||||
- Each sub-agent has an isolated context
|
||||
- Failure of one task does not affect others
|
||||
- Multiple tasks can run in parallel
|
||||
|
||||
#### using-git-worktrees
|
||||
|
||||
This skill uses Git worktree to create isolated development environments.
|
||||
|
||||
**Benefits**:
|
||||
- Multiple features can be developed in parallel
|
||||
- Each worktree is independent
|
||||
- No mutual conflicts
|
||||
|
||||
### 👀 Code Review Skills
|
||||
|
||||
#### requesting-code-review
|
||||
|
||||
After code is completed, this skill automatically requests code review.
|
||||
|
||||
```text
|
||||
Automatically trigger code review after finishing the feature
|
||||
```
|
||||
|
||||
#### receiving-code-review
|
||||
|
||||
This skill defines how to receive and process review feedback.
|
||||
|
||||
**Review workflow**:
|
||||
1. Submit code
|
||||
2. Trigger review automatically
|
||||
3. Reviewer checks code quality, security, and test coverage
|
||||
4. Reviewer provides improvement suggestions
|
||||
5. Fix issues
|
||||
6. Re-review until approval
|
||||
|
||||
---
|
||||
|
||||
## Full Superpowers Workflow
|
||||
|
||||
The real power of Superpowers comes from combining multiple skills into a full development workflow.
|
||||
|
||||
### Standard Development Workflow
|
||||
|
||||
```text
|
||||
1. Brainstorming
|
||||
↓ Clarify real requirements through Q&A
|
||||
|
||||
2. Design Document
|
||||
↓ Present design in chunks and wait for confirmation
|
||||
|
||||
3. Writing Plans
|
||||
↓ Break down into 2-5 minute tasks
|
||||
|
||||
4. Subagent Development
|
||||
↓ Launch isolated sub-agent per task
|
||||
↓ Two-stage review: spec conformance + code quality
|
||||
|
||||
5. TDD
|
||||
↓ Enforce RED-GREEN-REFACTOR
|
||||
|
||||
6. Code Review
|
||||
↓ Quality gate
|
||||
|
||||
7. Done
|
||||
```
|
||||
|
||||
### Practical Example: Build a User Authentication System with Superpowers
|
||||
|
||||
Let's experience the Superpowers workflow through a complete example.
|
||||
|
||||
#### Step 1: Clarify Requirements with Brainstorming
|
||||
|
||||
```text
|
||||
I need a user authentication system
|
||||
```
|
||||
|
||||
Claude will ask:
|
||||
- Is it a Web app or mobile app?
|
||||
- Which login methods are required?
|
||||
- Is password reset needed?
|
||||
- Is "remember me" needed?
|
||||
- ...
|
||||
|
||||
#### Step 2: Generate a Design Document
|
||||
|
||||
After requirements are confirmed, Claude will produce a design document:
|
||||
|
||||
```markdown
|
||||
# User Authentication System Design
|
||||
|
||||
## Functional Requirements
|
||||
1. User registration (email + password)
|
||||
2. User login
|
||||
3. Password reset (email)
|
||||
4. Remember me (JWT + Refresh Token)
|
||||
|
||||
## Technology Choices
|
||||
- Backend: Node.js + Express
|
||||
- Database: SQLite
|
||||
- Auth: JWT
|
||||
- Encryption: bcrypt
|
||||
|
||||
## API Design
|
||||
- POST /api/auth/register
|
||||
- POST /api/auth/login
|
||||
- POST /api/auth/refresh
|
||||
- POST /api/auth/reset-password
|
||||
```
|
||||
|
||||
#### Step 3: Write the Implementation Plan
|
||||
|
||||
```text
|
||||
Use writing-plans to create the implementation plan
|
||||
```
|
||||
|
||||
Claude will generate a detailed task list, each task completable in 2-5 minutes.
|
||||
|
||||
#### Step 4: Execute Development
|
||||
|
||||
```text
|
||||
Execute the plan above with TDD
|
||||
```
|
||||
|
||||
Claude will:
|
||||
1. Write tests first
|
||||
2. Confirm tests fail (RED)
|
||||
3. Write implementation code
|
||||
4. Confirm tests pass (GREEN)
|
||||
5. Refactor code (REFACTOR)
|
||||
|
||||
#### Step 5: Code Review
|
||||
|
||||
After completion, code review is triggered automatically to check:
|
||||
- code quality
|
||||
- security (SQL injection, XSS, etc.)
|
||||
- test coverage
|
||||
- documentation completeness
|
||||
|
||||
---
|
||||
|
||||
## Superpowers vs Direct Claude Code Use
|
||||
|
||||
| Dimension | Direct Claude Code Use | Using Superpowers |
|
||||
|------|---------------------|-----------------|
|
||||
| **Requirement clarification** | AI starts coding directly | Socratic questions clarify requirements first |
|
||||
| **Development process** | Free-form depending on AI | TDD red-green-refactor enforced |
|
||||
| **Task management** | One-shot completion | Broken into small tasks with checkpoints |
|
||||
| **Code quality** | Depends on AI judgment | Code review enforced |
|
||||
| **Predictability** | Unstable outcomes | Repeatable process |
|
||||
| **Best for** | Simple tasks, prototype validation | Complex projects, production code |
|
||||
|
||||
### Visual Metaphor
|
||||
|
||||
If Claude Code is a "smart intern":
|
||||
|
||||
- **Direct use**: tell the intern "build a login feature," and they start coding right away, possibly producing something you find off-target
|
||||
- **With Superpowers**: assign the intern a senior mentor who clarifies requirements, creates plans, and checks code quality
|
||||
|
||||
---
|
||||
|
||||
## Installation and Configuration in Detail
|
||||
|
||||
### Method 1: Via Marketplace (Recommended)
|
||||
|
||||
```bash
|
||||
# Add marketplace
|
||||
/plugin marketplace add obra/superpowers-marketplace
|
||||
|
||||
# Install
|
||||
/plugin install superpowers@superpowers-marketplace
|
||||
|
||||
# Verify installation
|
||||
/skills
|
||||
```
|
||||
|
||||
### Method 2: Manual Clone
|
||||
|
||||
```bash
|
||||
# Create directory
|
||||
mkdir -p ~/.claude/skills
|
||||
|
||||
# Clone repository
|
||||
git clone https://github.com/obra/superpowers.git ~/.claude/skills/superpowers
|
||||
```
|
||||
|
||||
### Method 3: Project-Level Installation
|
||||
|
||||
If you want to use Superpowers in a specific project:
|
||||
|
||||
```bash
|
||||
# In project root
|
||||
mkdir -p .claude/skills
|
||||
|
||||
# Clone or copy superpowers
|
||||
cp -r ~/.claude/skills/superpowers .claude/skills/
|
||||
```
|
||||
|
||||
This allows team members to share the same Superpowers configuration.
|
||||
|
||||
---
|
||||
|
||||
## Common Skills Quick Reference
|
||||
|
||||
| Skill Name | Function | Use Case |
|
||||
|---------|------|---------|
|
||||
| `brainstorming` | Clarify requirements through Socratic questioning | When requirements are unclear |
|
||||
| `writing-plans` | Break tasks into small steps | Before starting large projects |
|
||||
| `executing-plans` | Execute plan with checkpoints | During plan-driven development |
|
||||
| `test-driven-development` | TDD red-green-refactor loop | For all feature development |
|
||||
| `systematic-debugging` | Four-stage root-cause analysis | When bugs appear |
|
||||
| `verification-before-completion` | Pre-completion verification | At task completion |
|
||||
| `requesting-code-review` | Request code review | Before code submission |
|
||||
| `subagent-driven-development` | Sub-agent-driven development | Parallel tasks |
|
||||
| `using-git-worktrees` | Git worktree isolation | Parallel feature development |
|
||||
|
||||
---
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Use Clear Trigger Keywords
|
||||
|
||||
Superpowers skills are keyword-triggered. Learn common trigger words:
|
||||
|
||||
| Skill | Trigger Keywords |
|
||||
|------|-----------|
|
||||
| `test-driven-development` | "TDD", "test-driven", "write tests first" |
|
||||
| `brainstorming` | Auto-triggered when requirements are unclear |
|
||||
| `systematic-debugging` | "debug", "bug", "not working" |
|
||||
| `writing-plans` | "make a plan", "planning" |
|
||||
|
||||
### 2. Use Superpowers When Process Discipline Is Needed
|
||||
|
||||
- Production-grade code development -> mention "TDD"
|
||||
- Requirements are unclear -> let `brainstorming` clarify
|
||||
- Complex project -> use `writing-plans` to decompose tasks
|
||||
|
||||
### 3. Do Not Force It for Simple Tasks
|
||||
|
||||
If it is a rapid prototype or one-off script, you do not need the full process. Superpowers is most suitable for code requiring long-term maintenance.
|
||||
|
||||
### 4. Skills Can Be Combined
|
||||
|
||||
```text
|
||||
Implement user authentication with TDD, and after completion, help me do a code review
|
||||
```
|
||||
|
||||
This triggers both `test-driven-development` and `code-review` skills.
|
||||
|
||||
---
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
### Q1: Do I have to specify "TDD" when using Superpowers?
|
||||
|
||||
**Not required**.
|
||||
|
||||
Superpowers is a skill set, and each skill has its own trigger conditions:
|
||||
- Say "use TDD" -> triggers `test-driven-development`
|
||||
- Do not say TDD -> Claude may write tests or not (depends on model behavior)
|
||||
|
||||
Superpowers exists to **enforce process discipline**, not to create capability from nothing.
|
||||
|
||||
### Q2: Does Superpowers make development slower?
|
||||
|
||||
At first, it may feel slower because:
|
||||
- requirement clarification takes time
|
||||
- tests are written before code
|
||||
- code review is required
|
||||
|
||||
But in the long run, overall efficiency improves due to reduced rework and fewer bugs.
|
||||
|
||||
### Q3: Do small projects also need Superpowers?
|
||||
|
||||
For prototype validation or very simple tasks, you can use Claude Code directly. Superpowers is better suited for:
|
||||
- production-grade projects
|
||||
- multi-person collaboration
|
||||
- long-term maintainability
|
||||
|
||||
### Q4: What is the difference between Superpowers and Skills?
|
||||
|
||||
| Dimension | Superpowers | Skills |
|
||||
|------|-------------|--------|
|
||||
| **Nature** | Complete development methodology framework | Reusable skill packages |
|
||||
| **Scope** | Covers the full development process | Focuses on specific functions |
|
||||
| **Relationship** | Superpowers uses Skills internally | Superpowers is a collection of Skills |
|
||||
|
||||
### Q5: Can I customize Superpowers skills?
|
||||
|
||||
Yes. Superpowers is open-source, and you can:
|
||||
1. Fork the repository
|
||||
2. Modify existing skills
|
||||
3. Add new skills
|
||||
4. Contribute back to the community
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Official Resources
|
||||
|
||||
- [obra/superpowers GitHub](https://github.com/obra/superpowers) - official repository (50,000+ ⭐)
|
||||
- [Detailed Superpowers Usage Tutorial](https://www.cnblogs.com/gyc567/p/19510203) - detailed Chinese tutorial
|
||||
- [Superpowers Environment Setup Guide](https://m.blog.csdn.net/gitblog_00683/article/details/144768992) - setup guide
|
||||
|
||||
### Community Resources
|
||||
|
||||
| Repository | Description |
|
||||
|------|------|
|
||||
| [affaan-m/everything-claude-code](https://github.com/affaan-m/everything-claude-code) | comprehensive toolkit including TDD workflows |
|
||||
| [shanraisshan/claude-code-best-practice](https://github.com/shanraisshan/claude-code-best-practice) | official best practices |
|
||||
|
||||
### Related Articles
|
||||
|
||||
- [Goodbye Vibe Coding! Use Superpowers to Make Claude Code Write Engineering-Grade Code](https://juejin.cn/post/7593573617648123956)
|
||||
- [How I Use Superpowers MCP to Force Claude Code to Plan Before Coding](https://juejin.cn/post/7570341520551673871)
|
||||
- [Claude Code + Superpowers Beginner Tutorial](https://juejin.cn/post/7594832320030638123)
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Superpowers is a set of **engineering-grade development skills** that upgrades Claude Code from a "smart intern" to a "disciplined development team."
|
||||
|
||||
### Core Takeaways
|
||||
|
||||
1. **Superpowers is a skill set, not magic**
|
||||
- After installation, skills are available in the background
|
||||
- Triggered via keywords or scenarios
|
||||
- You can manually invoke specific skills
|
||||
|
||||
2. **Remember key trigger phrases**
|
||||
- Want TDD -> say "use TDD"
|
||||
- Vague requirements -> `brainstorming` asks proactively
|
||||
- Bug appears -> mention "debug" to trigger `systematic-debugging`
|
||||
|
||||
3. **Best-fit scenarios**
|
||||
- ✅ Production-grade code development
|
||||
- ✅ Long-term maintainable projects
|
||||
- ✅ Team collaboration projects
|
||||
- ❌ Rapid prototypes (optional)
|
||||
- ❌ One-off scripts (optional)
|
||||
|
||||
Remember: **Superpowers does not make AI smarter; it makes AI more disciplined.**
|
||||
@@ -0,0 +1,943 @@
|
||||
# AI-Assisted Development Workflow
|
||||
|
||||
In the previous chapters, we learned how to use AI IDEs to write code, how to manage code versions with Git, and how to design and implement API interfaces. But when you face a real development task, you may run into questions like these:
|
||||
|
||||
- "This project has thousands of files. Where should I start?"
|
||||
- "My boss asked me to add a new feature, but I'm not familiar with this part of the codebase."
|
||||
- "I have no idea where this bug is. There is just too much code."
|
||||
- "I need to refactor this pile of code, but I'm afraid of breaking something."
|
||||
|
||||
The essence of these questions is: **how do you use AI tools efficiently in real development scenarios to get work done?**
|
||||
|
||||
In this lesson, we will learn how to build a systematic AI-assisted development workflow so that you can use AI efficiently across different development scenarios. Through concrete examples, we will show how to use AI in new feature development, bug fixing, code refactoring, and more.
|
||||
|
||||
> 💡 **Prerequisites**
|
||||
>
|
||||
> Before studying this section, it is recommended that you first understand:
|
||||
> - [AI IDE Basics](../../stage-1/ai-ide/) - master the basic use of AI IDEs
|
||||
> - [Git and GitHub Workflow](../../stage-2/backend/2.4-git-workflow/) - understand code version management
|
||||
> - [Using Large Models to Help Write API Code](../../stage-2/backend/2.3-ai-interface-code/) - understand the basic concept of AI-assisted development
|
||||
|
||||
::: info 📚 What you will learn
|
||||
|
||||
1. Understand AI's role in the development process and its capability boundaries
|
||||
2. Master AI-assisted development strategies for different project types
|
||||
3. Learn how to use Claude Code in scenarios such as new feature development, bug fixing, and code refactoring
|
||||
4. Build a project knowledge base to improve collaboration efficiency with Claude Code
|
||||
5. Master practical techniques for improving AI collaboration efficiency
|
||||
|
||||
:::
|
||||
|
||||
# 1. Understand AI's Capability Boundaries
|
||||
|
||||
Before we start using AI to assist development, we first need to understand what AI can and cannot do. Only then can we build the right collaboration model.
|
||||
|
||||
## 1.1 What AI Is Good At
|
||||
|
||||
Think of AI as a very smart assistant that still needs clear instructions. It can quickly generate a code skeleton based on your description, and it can also read thousands of lines of code in seconds to find the part you need. If there are obvious syntax errors or common security vulnerabilities, it can help you discover them too. Repetitive tasks such as batch-renaming variables, formatting code, and generating documentation comments are especially suitable to hand over to AI.
|
||||
|
||||
Put simply, AI is good at work that has clear rules and can be automated.
|
||||
|
||||
## 1.2 What AI Is Not Good At
|
||||
|
||||
But AI also has its limitations. It does not understand your business logic. Unless you tell it in detail, it will not know how your company's order flow works. It also cannot make decisions such as technical selection or architecture design that require weighing trade-offs, because those depend on your experience and understanding of the project. AI also does not know your team's special conventions, such as "all APIs must have logging" or "error codes must use enums." You need to configure those rules or tell it explicitly.
|
||||
|
||||
Most importantly, code generated by AI cannot be used directly. You must review and test it. It may generate code that looks correct but is actually problematic, and it may ignore certain edge cases.
|
||||
|
||||
## 1.3 How to Collaborate with AI
|
||||
|
||||
Once you understand AI's capability boundaries, the collaboration model becomes clear: you are responsible for deciding what to build, making decisions, and ensuring quality; AI is responsible for executing concrete coding work, finding information, and surfacing obvious problems.
|
||||
|
||||
It is like working with a junior developer. You tell them what needs to be done, they implement it, and then you review the code. The difference is that AI executes much faster, but its judgment is weaker than a human's.
|
||||
|
||||
# 2. Development Strategies for Different Project Types
|
||||
|
||||
Different types of projects require different development styles and AI usage strategies. Choosing the right strategy can greatly improve development efficiency.
|
||||
|
||||
## 2.1 Brand-New Projects (Starting from Scratch)
|
||||
|
||||
**Project characteristics:**
|
||||
- No historical baggage, so you can design freely
|
||||
- You need to establish project structure and code conventions
|
||||
- Suitable for fast iteration and trial-and-error
|
||||
|
||||
**Recommended workflow:**
|
||||
|
||||
**Step 1: Plan the project structure**
|
||||
|
||||
Before you start coding, first ask AI to help you plan the project structure and technical choices:
|
||||
|
||||
```text
|
||||
I want to build a task management app with these features:
|
||||
- User registration and login
|
||||
- Create, edit, and delete tasks
|
||||
- Task categories and tags
|
||||
- Task reminders
|
||||
|
||||
Please help me:
|
||||
1. Recommend a suitable tech stack
|
||||
2. Design the project directory structure
|
||||
3. Plan the database schema
|
||||
```
|
||||
|
||||
**Step 2: Build the basic framework**
|
||||
|
||||
Based on the plan, ask AI to create the basic project structure:
|
||||
|
||||
```text
|
||||
Based on the plan we just discussed, help me:
|
||||
1. Create the project directory structure
|
||||
2. Initialize config files (package.json, .env, etc.)
|
||||
3. Create the basic server code
|
||||
```
|
||||
|
||||
**Step 3: Implement features one by one**
|
||||
|
||||
Implement feature modules one at a time by priority:
|
||||
|
||||
```text
|
||||
Now implement the user registration feature with these requirements:
|
||||
- Register with email and password
|
||||
- Store passwords in encrypted form
|
||||
- Email verification
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Establish code conventions early so AI generates code that follows them
|
||||
- Test and verify every feature module as soon as it is completed
|
||||
- Keep project documentation updated in time
|
||||
|
||||
## 2.2 Mature Projects (Large Existing Codebases)
|
||||
|
||||
**Project characteristics:**
|
||||
- Large codebase with historical conventions
|
||||
- You need to keep coding style consistent
|
||||
- Changes must consider the scope of impact
|
||||
|
||||
**Recommended workflow:**
|
||||
|
||||
**Step 1: Understand the project structure**
|
||||
|
||||
Before changing code, first ask AI to help you understand the project:
|
||||
|
||||
```text
|
||||
This is an e-commerce project, and I need to add a coupon feature.
|
||||
Please help me:
|
||||
1. Analyze the overall project structure
|
||||
2. Find the order-related code
|
||||
3. See how other similar features are implemented
|
||||
```
|
||||
|
||||
**Step 2: Find reference code**
|
||||
|
||||
Ask AI to find similar implementations in the project as references:
|
||||
|
||||
```text
|
||||
Find how other promotional features in the project, such as full reduction and discounts, are implemented
|
||||
```
|
||||
|
||||
**Step 3: Follow the existing style**
|
||||
|
||||
Ask AI to implement the new feature in the style of the existing code:
|
||||
|
||||
```text
|
||||
Please implement the coupon feature by referring to how the full-reduction promotion is implemented.
|
||||
Keep the same code style and directory structure.
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Understand first, then change things, so you do not damage the existing architecture
|
||||
- Keep coding style consistent
|
||||
- Test related functionality after the change
|
||||
|
||||
## 2.3 Rapid Prototypes (Validating Ideas)
|
||||
|
||||
**Project characteristics:**
|
||||
- Speed matters most, code quality matters less
|
||||
- Used to validate product ideas or technical approaches
|
||||
- May later be discarded or rewritten
|
||||
|
||||
**Recommended workflow:**
|
||||
|
||||
**Describe the requirement directly and implement quickly:**
|
||||
|
||||
```text
|
||||
Build a simple todo app with these requirements:
|
||||
- Add, delete, and mark tasks as completed
|
||||
- Store data locally
|
||||
- Keep the UI simple, as long as it works
|
||||
```
|
||||
|
||||
**Iterate quickly:**
|
||||
|
||||
```text
|
||||
Add search
|
||||
Switch it to a dark theme
|
||||
Add task categories
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Do not worry too much about code quality or conventions
|
||||
- Validate ideas quickly and adjust direction in time
|
||||
- If the prototype succeeds, it will need refactoring later
|
||||
|
||||
## 2.4 Maintenance Projects (Mostly Bug Fixes)
|
||||
|
||||
**Project characteristics:**
|
||||
- The code is already stable, and the main task is fixing issues
|
||||
- You need to locate problems quickly
|
||||
- Changes must be made carefully to avoid introducing new issues
|
||||
|
||||
**Recommended workflow:**
|
||||
|
||||
**Step 1: Locate the problem**
|
||||
|
||||
```text
|
||||
User feedback: after clicking the "Submit Order" button, the page freezes
|
||||
Console error: TypeError: Cannot read property 'id' of undefined
|
||||
|
||||
Please help me:
|
||||
1. Analyze possible causes
|
||||
2. Find the relevant code
|
||||
```
|
||||
|
||||
**Step 2: Analyze the root cause**
|
||||
|
||||
```text
|
||||
Check in what situations this error occurs
|
||||
Inspect the data flow
|
||||
```
|
||||
|
||||
**Step 3: Apply the fix**
|
||||
|
||||
```text
|
||||
Fix this problem, and:
|
||||
1. Add defensive code to avoid similar issues
|
||||
2. Add error messages to improve user experience
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Test thoroughly after the fix to ensure it does not affect other functionality
|
||||
- Add defensive code to improve system robustness
|
||||
- Record the problem and solution for future reference
|
||||
|
||||
# 3. Workflows for Common Development Tasks
|
||||
|
||||
In day-to-day development, we encounter many different types of tasks. Below are several of the most common AI-assisted workflows.
|
||||
|
||||
## 3.1 Developing a New Feature
|
||||
|
||||
**Scenario:** the product manager gives you a new requirement, and you need to implement a new feature.
|
||||
|
||||
**Complete workflow:**
|
||||
|
||||
**Step 1: Understand the requirement** (done by you)
|
||||
|
||||
Before you start coding, first clarify:
|
||||
- What feature needs to be implemented?
|
||||
- What are the inputs and outputs?
|
||||
- What are the edge conditions and exceptional cases?
|
||||
- What are the performance and security requirements?
|
||||
|
||||
**Step 2: Understand the existing code** (ask AI to help)
|
||||
|
||||
```text
|
||||
I need to implement a user comment feature. Please help me:
|
||||
1. Check whether there is anything similar in the project
|
||||
2. Find how user data and article data are stored
|
||||
3. Understand the database operation conventions in this project
|
||||
```
|
||||
|
||||
**Step 3: Make an implementation plan** (with AI)
|
||||
|
||||
```text
|
||||
Based on the analysis we just did, implementing the comment feature requires:
|
||||
1. Creating a comment data model
|
||||
2. Implementing CRUD APIs for comments
|
||||
3. Adding comment display and posting functionality to the frontend
|
||||
|
||||
Please help me confirm whether this plan is reasonable and whether anything is missing
|
||||
```
|
||||
|
||||
**Step 4: Implement the feature** (AI generates, you review)
|
||||
|
||||
```text
|
||||
According to the plan, first implement the comment data model.
|
||||
Requirements:
|
||||
- Include comment content, author, article ID, and creation time
|
||||
- Support nested comment replies
|
||||
- Add necessary indexes
|
||||
```
|
||||
|
||||
```text
|
||||
Now implement the comment API endpoints.
|
||||
Requirements:
|
||||
- Post a comment
|
||||
- Delete a comment (only your own comments can be deleted)
|
||||
- Get all comments for an article
|
||||
- Support pagination
|
||||
```
|
||||
|
||||
```text
|
||||
Finally implement the frontend comment component.
|
||||
Requirements:
|
||||
- Show the comment list
|
||||
- Comment posting form
|
||||
- Support replying to comments
|
||||
```
|
||||
|
||||
**Step 5: Test and verify** (done by you)
|
||||
|
||||
- Manually test the main flow
|
||||
- Test edge cases such as empty content and overly long content
|
||||
- Test permission control such as being unable to delete someone else's comment
|
||||
- Ask AI to help you write automated tests
|
||||
|
||||
**Real conversation example:**
|
||||
|
||||
```text
|
||||
You: I want to add a user comment feature so users can post comments on articles
|
||||
|
||||
AI: Okay, let me first understand the project structure.
|
||||
[AI analyzes the project code]
|
||||
I see that the project already has a user system (models/user.js)
|
||||
and an article system (models/article.js).
|
||||
|
||||
Implementing the comment feature requires:
|
||||
1. Creating a comment data model (models/comment.js)
|
||||
2. Adding comment-related APIs (routes/comment.js)
|
||||
3. Adding comment display and posting functionality to the article detail page
|
||||
|
||||
Should I start implementing it?
|
||||
|
||||
You: Yes, let's start. Create the data model first
|
||||
|
||||
AI: [Generates models/comment.js]
|
||||
I have created the comment data model with these fields:
|
||||
- id: comment ID
|
||||
- content: comment content
|
||||
- userId: author ID
|
||||
- articleId: article ID
|
||||
- parentId: parent comment ID (for nested replies)
|
||||
- createdAt: creation time
|
||||
|
||||
Should I implement the API next?
|
||||
|
||||
You: Yes, implement the API
|
||||
```
|
||||
|
||||
## 3.2 Fixing Bugs
|
||||
|
||||
**Scenario:** a user reports a problem, and you need to locate and fix it quickly.
|
||||
|
||||
**Complete workflow:**
|
||||
|
||||
**Step 1: Gather information** (done by you)
|
||||
|
||||
Record the following information:
|
||||
- What is the error message?
|
||||
- How can this problem be reproduced?
|
||||
- When did it start happening?
|
||||
- How large is the impact scope?
|
||||
|
||||
**Step 2: Locate the problem** (ask AI to help)
|
||||
|
||||
```text
|
||||
User feedback: after clicking the "Submit Order" button, the page freezes
|
||||
Browser console error: TypeError: Cannot read property 'id' of undefined
|
||||
The error occurs on line 45 of checkout.js
|
||||
|
||||
Please help me:
|
||||
1. Analyze possible causes
|
||||
2. Find the relevant code
|
||||
3. Check the data flow
|
||||
```
|
||||
|
||||
**Step 3: Analyze the root cause** (with AI)
|
||||
|
||||
```text
|
||||
Check where this user object comes from
|
||||
Inspect under what circumstances user could be undefined
|
||||
```
|
||||
|
||||
**Step 4: Implement the fix** (AI generates, you review)
|
||||
|
||||
```text
|
||||
Fix this problem with these requirements:
|
||||
1. Add defensive code to check whether user exists
|
||||
2. If user does not exist, redirect to the login page
|
||||
3. Add a friendly error message
|
||||
```
|
||||
|
||||
**Step 5: Verify the fix** (done by you)
|
||||
|
||||
- Reproduce the issue using the original steps and confirm the problem is solved
|
||||
- Test related functionality to ensure no new problems were introduced
|
||||
- Think about whether similar problems exist elsewhere
|
||||
|
||||
**Real conversation example:**
|
||||
|
||||
```text
|
||||
You: A user reported that after logging in, refreshing the page logs them out
|
||||
|
||||
AI: This is usually a token storage or validation issue. Let me check:
|
||||
1. Where is the token stored, localStorage or cookies?
|
||||
2. Is the token reloaded when the page refreshes?
|
||||
|
||||
Please tell me which file contains the token-related code
|
||||
|
||||
You: utils/auth.js
|
||||
|
||||
AI: [Analyzes utils/auth.js]
|
||||
I found the issue: the token is stored in sessionStorage,
|
||||
and sessionStorage is cleared when the page is refreshed.
|
||||
|
||||
Solution: use localStorage instead
|
||||
|
||||
Would you like me to change it?
|
||||
|
||||
You: Yes, change it
|
||||
|
||||
AI: [Modifies the code]
|
||||
The change is complete. The token is now stored in localStorage,
|
||||
so it will not be lost after a page refresh.
|
||||
|
||||
I recommend that you test:
|
||||
1. Log in and refresh the page to confirm the user stays logged in
|
||||
2. Close and reopen the browser to confirm the user stays logged in
|
||||
```
|
||||
|
||||
## 3.3 Refactoring Code
|
||||
|
||||
**Scenario:** the code has become hard to maintain, and it needs refactoring to improve code quality.
|
||||
|
||||
**Complete workflow:**
|
||||
|
||||
**Step 1: Define the refactoring goal** (done by you)
|
||||
|
||||
Clarify the following:
|
||||
- What problem are you trying to solve? (duplicate code? messy structure? performance issues?)
|
||||
- How large is the scope of the refactor?
|
||||
- What level of risk is acceptable?
|
||||
|
||||
**Step 2: Make a refactoring plan** (with AI)
|
||||
|
||||
```text
|
||||
These three functions (getUserList, getArticleList, getCommentList)
|
||||
contain a lot of duplicated code for pagination, sorting, and filtering.
|
||||
|
||||
Please help me:
|
||||
1. Analyze the duplicated parts
|
||||
2. Propose a refactoring plan
|
||||
3. Evaluate the risk of the refactor
|
||||
```
|
||||
|
||||
**Step 3: Prepare a safety net** (ask AI to help)
|
||||
|
||||
```text
|
||||
Before refactoring, please write test cases for me first
|
||||
so that the behavior stays the same after the refactor
|
||||
```
|
||||
|
||||
**Step 4: Refactor step by step** (AI executes, you verify)
|
||||
|
||||
```text
|
||||
According to the plan we just discussed, first extract the shared function
|
||||
```
|
||||
|
||||
```text
|
||||
Now optimize the data structure
|
||||
```
|
||||
|
||||
```text
|
||||
Finally update the call sites
|
||||
```
|
||||
|
||||
**Step 5: Verify the result** (done by you)
|
||||
|
||||
- Run all tests and make sure they pass
|
||||
- Manually test key functionality
|
||||
- Check whether code quality has improved
|
||||
|
||||
**Real conversation example:**
|
||||
|
||||
```text
|
||||
You: These three functions have a lot of duplicate code. Can you help me refactor them?
|
||||
[Paste the code of the three functions]
|
||||
|
||||
AI: I analyzed them, and the main differences are:
|
||||
- getUserList: gets the user list
|
||||
- getArticleList: gets the article list
|
||||
- getCommentList: gets the comment list
|
||||
|
||||
Their common logic is pagination, sorting, and filtering.
|
||||
|
||||
Refactoring plan:
|
||||
1. Extract a generic getList function
|
||||
2. Pass the different parts as parameters
|
||||
|
||||
This can reduce 80% of the duplicated code.
|
||||
|
||||
Should I implement it?
|
||||
|
||||
You: Yes, but first help me write test cases
|
||||
|
||||
AI: [Generates tests]
|
||||
I have written tests for all three functions,
|
||||
covering the main use cases.
|
||||
|
||||
Can I start refactoring now?
|
||||
|
||||
You: Yes, start refactoring
|
||||
|
||||
AI: [Refactors the code]
|
||||
Refactoring is complete. I created a generic getList function,
|
||||
and the original three functions now all call that shared function.
|
||||
|
||||
Please run the tests to confirm everything works: npm test
|
||||
```
|
||||
|
||||
## 3.4 Code Review
|
||||
|
||||
**Scenario:** a teammate submitted code, and you need to review it.
|
||||
|
||||
**Complete workflow:**
|
||||
|
||||
**Step 1: Quickly understand the change** (ask AI to help)
|
||||
|
||||
```text
|
||||
Summarize what this PR changed
|
||||
Which files were mainly modified
|
||||
```
|
||||
|
||||
**Step 2: Check common issues** (ask AI to help)
|
||||
|
||||
```text
|
||||
Check this change for:
|
||||
1. Security issues (SQL injection, XSS, etc.)
|
||||
2. Performance problems (N+1 queries, memory leaks, etc.)
|
||||
3. Obvious bugs
|
||||
4. Whether the coding style follows the project conventions
|
||||
```
|
||||
|
||||
**Step 3: Deep review** (done by you)
|
||||
|
||||
- Whether the business logic is correct
|
||||
- Whether edge cases were considered
|
||||
- Whether there is a better implementation approach
|
||||
- Whether tests should be added
|
||||
|
||||
**Step 4: Give feedback** (done by you)
|
||||
|
||||
- Point out the places that need changes
|
||||
- Explain why they should be changed
|
||||
- Provide improvement suggestions
|
||||
|
||||
**Real conversation example:**
|
||||
|
||||
```text
|
||||
You: Help me review this PR
|
||||
[Provide the PR link or code]
|
||||
|
||||
AI: I reviewed the change and found these issues:
|
||||
|
||||
1. Security issues:
|
||||
- Line 23: user input is not escaped, which may lead to XSS risk
|
||||
- Line 45: the SQL query is built by string concatenation, which risks injection
|
||||
|
||||
2. Performance problems:
|
||||
- Line 67: the database is queried inside a loop, which creates an N+1 problem
|
||||
|
||||
3. Code quality:
|
||||
- The handleSubmit function is too long (200 lines); consider splitting it
|
||||
- Error handling is missing
|
||||
|
||||
Would you like me to explain any of these in more detail?
|
||||
|
||||
You: Explain the SQL injection issue in detail
|
||||
|
||||
AI: On line 45:
|
||||
```javascript
|
||||
const query = `SELECT * FROM users WHERE name = '${userName}'`;
|
||||
```
|
||||
|
||||
This directly concatenates user input into the SQL statement.
|
||||
If a user enters `' OR '1'='1`, it becomes:
|
||||
```sql
|
||||
SELECT * FROM users WHERE name = '' OR '1'='1'
|
||||
```
|
||||
This would return all user data.
|
||||
|
||||
The correct approach is to use a parameterized query:
|
||||
```javascript
|
||||
const query = 'SELECT * FROM users WHERE name = ?';
|
||||
db.query(query, [userName]);
|
||||
```
|
||||
|
||||
Would you like me to fix it?
|
||||
```
|
||||
|
||||
# 4. Build a Project Knowledge Base
|
||||
|
||||
To help AI understand your project better, it is recommended to build a knowledge base inside the project. That way AI can work according to your conventions and habits.
|
||||
|
||||
## 4.1 Create a Project Description File
|
||||
|
||||
Create a `CLAUDE.md` or `AGENTS.md` file in the project root to record key project information:
|
||||
|
||||
```markdown
|
||||
# Project Overview
|
||||
|
||||
## Project Summary
|
||||
This is an online learning platform that provides course management, user learning, assignment submission, and other features.
|
||||
|
||||
## Tech Stack
|
||||
- Frontend: React 18 + TypeScript + Vite
|
||||
- Backend: Node.js + Express + PostgreSQL
|
||||
- Deployment: Vercel (frontend) + Railway (backend)
|
||||
|
||||
## Project Structure
|
||||
```
|
||||
src/
|
||||
├── components/ # React components
|
||||
├── pages/ # Page components
|
||||
├── api/ # API calls
|
||||
├── utils/ # Utility functions
|
||||
└── types/ # TypeScript type definitions
|
||||
```
|
||||
|
||||
## Code Conventions
|
||||
- Use ESLint and Prettier to format code
|
||||
- Component files use PascalCase (such as UserProfile.tsx)
|
||||
- Utility functions use camelCase (such as formatDate.ts)
|
||||
- Constants use UPPER_SNAKE_CASE (such as API_BASE_URL)
|
||||
|
||||
## Development Flow
|
||||
1. Create a feature branch from main
|
||||
2. Submit a PR after development is complete
|
||||
3. Merge after code review passes
|
||||
|
||||
## Common Tasks
|
||||
- Start the development server: `npm run dev`
|
||||
- Run tests: `npm test`
|
||||
- Build for production: `npm run build`
|
||||
- Format code: `npm run format`
|
||||
|
||||
## Notes
|
||||
- All API calls must include error handling
|
||||
- User input must be validated and escaped
|
||||
- Use parameterized queries for database operations to avoid SQL injection
|
||||
- Sensitive information (passwords, tokens) must not be written to logs
|
||||
|
||||
## Database Schema
|
||||
- users: user table (id, email, password_hash, created_at)
|
||||
- courses: course table (id, title, description, teacher_id)
|
||||
- enrollments: enrollment table (id, user_id, course_id, enrolled_at)
|
||||
```
|
||||
|
||||
## 4.2 Record Common Problems and Solutions
|
||||
|
||||
Create `docs/troubleshooting.md` in the project to record common problems:
|
||||
|
||||
```markdown
|
||||
# Common Problems
|
||||
|
||||
## Development Environment Problems
|
||||
|
||||
### Problem: npm install fails
|
||||
**Cause:** Node version is incompatible
|
||||
**Solution:** Use Node.js 18 or higher
|
||||
|
||||
### Problem: database connection fails
|
||||
**Cause:** environment variables are not configured
|
||||
**Solution:** Copy .env.example to .env and fill in the database connection info
|
||||
|
||||
## Feature Problems
|
||||
|
||||
### Problem: after users log in, refreshing the page logs them out
|
||||
**Cause:** the token is stored in sessionStorage
|
||||
**Solution:** switch to localStorage
|
||||
|
||||
### Problem: image upload fails
|
||||
**Cause:** file size exceeds the limit
|
||||
**Solution:** add a file size check on the frontend and limit it to 5MB
|
||||
```
|
||||
|
||||
## 4.3 Maintain Technical Decision Records
|
||||
|
||||
Create a `docs/decisions/` directory to record important technical decisions:
|
||||
|
||||
```markdown
|
||||
# ADR-001: Choosing PostgreSQL as the Database
|
||||
|
||||
## Status
|
||||
Accepted
|
||||
|
||||
## Background
|
||||
The project needs to choose a relational database. The candidates are MySQL and PostgreSQL.
|
||||
|
||||
## Decision
|
||||
Choose PostgreSQL
|
||||
|
||||
## Rationale
|
||||
1. Better JSON support, suitable for storing course content
|
||||
2. Stronger full-text search
|
||||
3. The team is more familiar with PostgreSQL
|
||||
|
||||
## Consequences
|
||||
- We need to learn PostgreSQL-specific features
|
||||
- Deployment requires a PostgreSQL environment
|
||||
```
|
||||
|
||||
# 5. Techniques for Improving AI Collaboration Efficiency
|
||||
|
||||
By mastering some practical techniques, you can make your collaboration with AI more efficient.
|
||||
|
||||
## 5.1 Be Clear and Specific When Describing Problems
|
||||
|
||||
**Bad description:**
|
||||
```text
|
||||
This feature has a problem
|
||||
Help me optimize it
|
||||
```
|
||||
|
||||
**Good description:**
|
||||
```text
|
||||
After the user clicks the "Submit" button, the form is not submitted
|
||||
The browser console reports: Uncaught TypeError: Cannot read property 'value' of null
|
||||
The error occurs on line 23 of form.js
|
||||
|
||||
This list loads very slowly and has 1000 items
|
||||
Please help me add pagination with 20 items per page
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Provide specific error information
|
||||
- Explain the expected result
|
||||
- Give relevant context
|
||||
|
||||
## 5.2 Do Only One Thing at a Time
|
||||
|
||||
**Bad approach:**
|
||||
```text
|
||||
Help me implement login, registration, password recovery, profile center,
|
||||
password change, and email verification
|
||||
```
|
||||
|
||||
**Good approach:**
|
||||
```text
|
||||
Implement the login feature first, with these requirements:
|
||||
- Email and password login
|
||||
- Remember login state
|
||||
- Error messages
|
||||
|
||||
(After it is done) Now implement the registration feature
|
||||
|
||||
(After it is done) Now implement the password recovery feature
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- Break large tasks into small tasks
|
||||
- Test and verify after every completed task
|
||||
- Confirm there are no issues before moving to the next one
|
||||
|
||||
## 5.3 Verify Results Promptly
|
||||
|
||||
**Bad approach:**
|
||||
- Let AI modify 10 files in a row
|
||||
- Only discover at the end that the first change was already wrong
|
||||
- Waste a lot of time
|
||||
|
||||
**Good approach:**
|
||||
- Modify one file and test immediately
|
||||
- Confirm there is no problem, then continue
|
||||
- Correct issues as soon as they are found
|
||||
|
||||
**Key points:**
|
||||
- Move in small steps and get fast feedback
|
||||
- Do not blindly trust AI
|
||||
- Stay in control of the code
|
||||
|
||||
## 5.4 Make Good Use of Context
|
||||
|
||||
**Technique 1: refer to previous conversation**
|
||||
```text
|
||||
Implement according to the plan we just discussed
|
||||
Refer to the previous getUserList function
|
||||
```
|
||||
|
||||
**Technique 2: provide related code**
|
||||
```text
|
||||
This is the existing user model code:
|
||||
[paste code]
|
||||
|
||||
Please implement the article model in the same style
|
||||
```
|
||||
|
||||
**Technique 3: explain project background**
|
||||
```text
|
||||
This is an e-commerce project using React + Node.js
|
||||
It already has a user system and a product system
|
||||
Now we need to add a shopping cart feature
|
||||
```
|
||||
|
||||
## 5.5 Save Useful Conversations
|
||||
|
||||
**Scenario:** you solved a complex problem
|
||||
|
||||
**How to do it:**
|
||||
1. Record the solution in project documentation
|
||||
2. Refer to it the next time a similar issue appears
|
||||
3. Share it with other team members
|
||||
|
||||
**Example:**
|
||||
|
||||
Create a document under `docs/solutions/`:
|
||||
|
||||
```markdown
|
||||
# Solving the N+1 Query Problem
|
||||
|
||||
## Problem Description
|
||||
When fetching the article list, the system queries the author information once per article,
|
||||
which causes a performance problem.
|
||||
|
||||
## Solution
|
||||
Use a JOIN query to fetch all the data in one go:
|
||||
|
||||
```sql
|
||||
SELECT articles.*, users.name as author_name
|
||||
FROM articles
|
||||
LEFT JOIN users ON articles.author_id = users.id
|
||||
```
|
||||
|
||||
**Result:** query time dropped from 2000ms to 50ms
|
||||
|
||||
## 5.6 Learn the Art of Asking Questions
|
||||
|
||||
**Technique 1: ask "why" first**
|
||||
```text
|
||||
Why does this code cause a memory leak?
|
||||
Why should we use useCallback instead of a normal function?
|
||||
```
|
||||
|
||||
**Technique 2: ask for multiple options**
|
||||
```text
|
||||
What are the different ways to implement user authentication?
|
||||
What are the pros and cons of each?
|
||||
```
|
||||
|
||||
**Technique 3: ask for explanations**
|
||||
```text
|
||||
How does this code work?
|
||||
Can you explain this algorithm in detail?
|
||||
```
|
||||
|
||||
# 6. Frequently Asked Questions
|
||||
|
||||
## Q1: Can I use AI-generated code directly?
|
||||
|
||||
**A:** No, not directly. It needs review and testing.
|
||||
|
||||
AI-generated code may have the following problems:
|
||||
- logical errors or poor handling of edge cases
|
||||
- failure to match the project's coding conventions
|
||||
- security risks
|
||||
- insufficient performance optimization
|
||||
|
||||
You need to:
|
||||
- carefully read the generated code
|
||||
- understand its logic
|
||||
- test different scenarios
|
||||
- confirm that it follows the project conventions
|
||||
|
||||
## Q2: What if AI misunderstands what I mean?
|
||||
|
||||
**A:** Correct it in time and describe the requirement again.
|
||||
|
||||
```text
|
||||
That's not what I meant. What I mean is...
|
||||
This understanding is incorrect. It should be...
|
||||
Let me describe the requirement again...
|
||||
```
|
||||
|
||||
If it is still wrong after several corrections, you can:
|
||||
- provide more context
|
||||
- give specific code examples
|
||||
- split the task into smaller pieces
|
||||
|
||||
## Q3: What if I run into something AI cannot solve?
|
||||
|
||||
**A:** AI is not all-powerful. Some problems still need you to solve them yourself.
|
||||
|
||||
Problems AI may not be able to solve:
|
||||
- very new technologies (AI knowledge has a cutoff date)
|
||||
- business logic unique to your team
|
||||
- problems that require access to external systems
|
||||
- complex performance optimization issues
|
||||
|
||||
At that point, you need to:
|
||||
- read the official documentation
|
||||
- search for related solutions
|
||||
- ask experienced teammates
|
||||
- ask in the community
|
||||
|
||||
## Q4: How do I judge whether AI's suggestion is reasonable?
|
||||
|
||||
**A:** Use your own experience and knowledge to judge it.
|
||||
|
||||
Evaluation criteria:
|
||||
- whether it follows best practices
|
||||
- whether it considers edge cases
|
||||
- whether there are potential security risks
|
||||
- whether it fits the project's tech stack
|
||||
- whether performance is acceptable
|
||||
|
||||
If you are not sure, you can:
|
||||
- ask AI to explain why it suggests that approach
|
||||
- ask for alternative solutions
|
||||
- consult team members
|
||||
|
||||
## Q5: How should a team use AI in collaboration?
|
||||
|
||||
**A:** Establish shared conventions and a shared knowledge base.
|
||||
|
||||
Recommendations for team collaboration:
|
||||
- share the project's `CLAUDE.md` configuration
|
||||
- unify code conventions and style
|
||||
- record solutions to common problems
|
||||
- regularly share useful prompts
|
||||
- check AI-generated code during code review
|
||||
|
||||
## Q6: How do I avoid becoming overly dependent on AI?
|
||||
|
||||
**A:** Keep learning and thinking. AI is an assistant, not a replacement.
|
||||
|
||||
Recommendations:
|
||||
- understand AI-generated code instead of copying it blindly
|
||||
- actively learn concepts you do not understand
|
||||
- regularly review foundational knowledge
|
||||
- try solving problems yourself first, then use AI to verify
|
||||
- participate in code review to learn from others' experience
|
||||
|
||||
# 7. Summary
|
||||
|
||||
Through this chapter, you have now mastered:
|
||||
|
||||
1. **AI's capability boundaries**: understand what AI is good at and not good at, and build the right collaboration model
|
||||
2. **Project-type strategies**: different development strategies for brand-new projects, mature projects, rapid prototypes, and maintenance projects
|
||||
3. **Common task workflows**: complete workflows for new feature development, bug fixing, code refactoring, and code review
|
||||
4. **Project knowledge base**: learn how to build project documentation so AI can understand your project better
|
||||
5. **Collaboration techniques**: practical ways to improve AI collaboration efficiency
|
||||
|
||||
**Key takeaways:**
|
||||
|
||||
- **Clear division of roles**: you make decisions and ensure quality, AI handles execution and assistance
|
||||
- **Clear communication**: be specific and do one thing at a time
|
||||
- **Verify promptly**: do not trust blindly, test and verify
|
||||
- **Keep learning**: understand AI's capability boundaries and continuously improve the collaboration model
|
||||
|
||||
Remember: AI is a tool, not a replacement. It can make you more efficient, but the final code quality still depends on your judgment. Start with simple tasks and gradually build trust. You will find that AI can save you a lot of time and let you focus on more valuable work.
|
||||
|
||||
::: tip 💡 Next step
|
||||
In the next chapter, we will learn how to use AI for code review and quality assurance to ensure code maintainability and security.
|
||||
:::
|
||||
@@ -0,0 +1,535 @@
|
||||
# How to Choose the Right Platform for Your Application
|
||||
|
||||
You have an idea and want to turn it into a real product. But with so many platform options - WeChat Mini Programs, iOS apps, Android apps, websites, browser extensions, desktop applications - where should you start?
|
||||
|
||||
::: tip 💡 Quick Navigation
|
||||
If you already know the characteristics of each platform, you can jump directly to [Section 2](#2-ask-yourself-three-questions-first) for the decision process, or see [the decision flowchart in Section 7](#7-summary-platform-selection-decision-flow).
|
||||
:::
|
||||
|
||||
This article will help you sort out your thinking and find the most suitable development platform based on your specific scenario.
|
||||
|
||||
## 1 Know These Platforms First
|
||||
|
||||
Before discussing "which one to choose," first understand "which ones exist." Below are the mainstream platform categories right now:
|
||||
|
||||
### 1.1 Mobile Platforms
|
||||
|
||||
#### iOS Native App
|
||||
|
||||
The apps you download from the App Store on your iPhone are iOS native apps. Their features are: fast launch, smooth experience, and full access to phone capabilities (camera, location, health data, etc.). But development requires a Mac, and App Store release requires Apple's review.
|
||||
|
||||
**Common examples**: WeChat, Douyin (TikTok China), Xiaohongshu, Keep, Meituan, Alipay
|
||||
|
||||
#### Android Native App
|
||||
|
||||
Apps downloaded from Android app stores, or installed from APK files sent by friends, are Android native apps. They are similar to iOS apps, but Android has more users and more distribution channels. The downside is device fragmentation: developers must adapt to many screen sizes and system versions.
|
||||
|
||||
**Common examples**: Tasker (automation), MX Player (video player), AirDroid (phone manager), Greenify (battery optimization), Xposed Framework (system customization)
|
||||
|
||||
#### WeChat Mini Program
|
||||
|
||||
The "small apps" you can use directly inside WeChat by scanning a code or searching by name, with no installation needed. The advantage is low user friction: everyone already has WeChat, so users can start instantly. The downside is limited capabilities, and it only runs inside WeChat.
|
||||
|
||||
**Common examples**: Pinduoduo (group-buy e-commerce), Meituan Waimai (local services), Mobike (bike sharing), Jump Jump (mini game), Zhouheiya (ordering/shopping)
|
||||
|
||||
#### PWA (Progressive Web App)
|
||||
|
||||
It sounds technical, but it's basically "a web page that can be installed like an app." When users open a site in a mobile browser, they may see "Add to Home Screen." After one tap, an icon appears on the home screen and behaves like an app. The advantage is one codebase for mobile and desktop. The downside is many users do not know this usage pattern.
|
||||
|
||||
**Common examples**: Twitter Lite, Starbucks, Pinterest, Uber, Spotify Web Player
|
||||
|
||||
### 1.2 Desktop Platforms
|
||||
|
||||
#### Electron Desktop App
|
||||
|
||||
You might use them every day: VS Code, Slack, Discord, Notion, Figma - all built with Electron. The key feature is: build desktop software using web technologies (HTML, CSS, JavaScript), and run one codebase across Windows, Mac, and Linux. The downside is larger installers and higher runtime memory usage.
|
||||
|
||||
**Common examples**: VS Code, Slack, Discord, Notion, Figma, WeChat Developer Tools
|
||||
|
||||
#### Qt Desktop Application
|
||||
|
||||
If you have used WPS, VirtualBox, or OBS, they may have been built with Qt. Qt uses C++, with good performance and stability, especially suitable for industrial scenarios. But the learning curve is higher, and C++ knowledge is required.
|
||||
|
||||
**Common examples**: WPS Office, VirtualBox, Autodesk Maya, Telegram Desktop, OBS Studio
|
||||
|
||||
#### Native Desktop Application
|
||||
|
||||
These "heavyweight" applications are usually built with native technologies. Windows often uses C# or C++; macOS uses Swift. They provide the best performance and smoothest experience, but Windows and macOS versions must be developed separately, which is expensive.
|
||||
|
||||
**Common examples**: Microsoft Office, Adobe Photoshop, Final Cut Pro, WeChat (Windows/Mac), QQ Music
|
||||
|
||||
### 1.3 Web-Related Platforms
|
||||
|
||||
#### Website
|
||||
|
||||
These are pages opened by entering URLs in a browser. Advantages: accessible on any device (phone, computer, tablet), no installation required, and searchable by search engines. Downside: internet connection is required, so offline usage is unavailable.
|
||||
|
||||
**Common examples**: Taobao, Zhihu, GitHub, Bilibili, Juejin, CSDN
|
||||
|
||||
#### Browser Extension
|
||||
|
||||
Have you used ad blockers, translation tools, or password managers? These are browser extensions. They run inside browsers and can read/modify web page content. For example, install a translation extension and translate English pages with one click. Advantage: lightweight and starts with browser. Downside: works only in browsers, and extensions are not always cross-compatible across Chrome, Edge, and Firefox.
|
||||
|
||||
**Common examples**: AdBlock Plus, Immersive Translate, 1Password, Grammarly, Tampermonkey, Dark Reader
|
||||
|
||||
### 1.4 Other Platforms
|
||||
|
||||
#### VS Code Extension
|
||||
|
||||
If you are a developer, you likely use VS Code. VS Code extensions are small programs that "add features" to the editor. Advantage: highly targeted developer audience. Downside: only useful for developer users.
|
||||
|
||||
**Common examples**: Prettier, GitLens, GitHub Copilot, ESLint, Live Server, Chinese Language Pack
|
||||
|
||||
#### NFT Smart Contract
|
||||
|
||||
You may have heard about NFTs - those "digital avatars" sold for millions. NFTs are essentially blockchain-based ownership certificates proving a digital item belongs to you. Smart contracts are programs running on blockchain to create and manage NFTs. Advantage: tamper-resistant and tradable. Downside: high technical barrier and volatile market.
|
||||
|
||||
**Common examples**: BAYC, CryptoPunks, NBA Top Shot, Azuki, Moonbirds
|
||||
|
||||
### 1.5 Are There More Options?
|
||||
|
||||
Beyond the platforms above, there are also "middle paths" and more possibilities:
|
||||
|
||||
#### Cross-platform Frameworks
|
||||
|
||||
::: details Click to view cross-platform framework details
|
||||
|
||||
**React Native / Flutter**: want both iOS and Android without writing two codebases? These frameworks let you write once and generate apps for both platforms. Many companies use them, such as Airbnb and Instagram.
|
||||
|
||||
**Tauri**: a "lightweight alternative" to Electron. It also uses web tech to build desktop apps but with smaller installers and faster runtime. Downside: ecosystem is less mature.
|
||||
|
||||
**uni-app**: very popular in China. One codebase can target WeChat Mini Program, iOS app, Android app, and H5 website. Suitable for teams that want "build once, run everywhere."
|
||||
|
||||
**Capacitor / Ionic**: already have a website and want to quickly turn it into an app? These tools can "wrap" your website into an installable app for app stores.
|
||||
|
||||
These frameworks are essentially trade-offs between native and web development: higher development efficiency, but some compromises on performance and experience.
|
||||
:::
|
||||
|
||||
#### China Mini Program Ecosystem
|
||||
|
||||
::: details Click to view mini program options in China
|
||||
|
||||
**Alipay Mini Program**: finance and local service scenarios. If your users pay bills, order food, or use transit in Alipay, then Alipay Mini Program is a fit. Capabilities like Zhima credit and trust identity are unique to Alipay.
|
||||
|
||||
**Douyin Mini Program**: content commerce and livestream sales. If you sell on Douyin, mini programs can be attached under videos for instant conversion.
|
||||
|
||||
**Kuaishou Mini Program**: lower-tier markets and strong community economy. Kuaishou users are highly engaged, suitable for community group buying and local services.
|
||||
|
||||
**Baidu Mini Program**: search traffic entry. If users search "nearby restaurants" on Baidu, your mini program can appear directly in results.
|
||||
:::
|
||||
|
||||
#### HarmonyOS Ecosystem
|
||||
|
||||
**HarmonyOS apps**: can run on Huawei phones, tablets, watches, and smart home devices. Developed with ArkTS (similar to TypeScript), one codebase can support multiple devices. If your audience is in Huawei ecosystem or your product involves IoT linkage, HarmonyOS is a key option.
|
||||
|
||||
#### More Developer Tools
|
||||
|
||||
::: details Click to view more developer tool options
|
||||
|
||||
**Command Line Tools (CLI)**: developers use terminal daily. CLI tools can automate repetitive work, generate code templates, and deploy projects. Examples include `create-react-app`, `git`, and `npm`. Suitable for developer productivity and DevOps automation.
|
||||
|
||||
**JetBrains plugins**: besides VS Code, many developers use IntelliJ IDEA, PyCharm, and WebStorm. If your tool targets Java, Python, or frontend developers, JetBrains Marketplace is also worth considering.
|
||||
|
||||
**Cursor / Windsurf plugins**: emerging ecosystems for AI coding tools. If you are building AI-assisted coding features, these IDE plugin ecosystems are growing quickly.
|
||||
:::
|
||||
|
||||
#### Community Bots
|
||||
|
||||
::: details Click to view community bot options
|
||||
|
||||
**Telegram Bot**: large overseas user base and developer-friendly APIs. Suitable for notifications, automation tasks, and community management. Many crypto projects and dev communities use Telegram.
|
||||
|
||||
**Discord Bot**: core platform for gaming and developer communities. Useful for music playback, game data queries, and server management. If your users are gamers or overseas developers, Discord bots are often essential.
|
||||
:::
|
||||
|
||||
#### Design and Productivity Tools
|
||||
|
||||
::: details Click to view design tool options
|
||||
|
||||
**Figma plugins**: designers use Figma every day. Plugins can automate design workflows, generate code, and manage design systems. Suitable for design tooling and frontend assistance.
|
||||
|
||||
**Notion integrations**: with Notion API you can automate workflows, sync data, and generate reports. Suitable for knowledge management and project management tools.
|
||||
:::
|
||||
|
||||
#### Spatial Computing
|
||||
|
||||
**visionOS apps (Apple Vision Pro)**: the new era of spatial computing. Suitable for 3D content display, immersive experiences, education/training, and virtual collaboration. Technical barrier is high, but for frontier exploration this is a future direction.
|
||||
|
||||
---
|
||||
|
||||
## 2 Ask Yourself Three Questions First
|
||||
|
||||
Before choosing a platform, answer these three core questions:
|
||||
|
||||
<el-card shadow="hover" style="margin: 20px 0; border-radius: 12px; border-left: 4px solid #409EFF;">
|
||||
<template #header>
|
||||
<div style="display: flex; align-items: center; gap: 8px;">
|
||||
<span style="font-size: 20px;">🎯</span>
|
||||
<span style="font-weight: bold; font-size: 16px;">Question 1: Where are your users?</span>
|
||||
</div>
|
||||
</template>
|
||||
<div style="line-height: 1.8; color: #606266;">
|
||||
<ul>
|
||||
<li>Do users need to use it anytime, anywhere? (mobile first)</li>
|
||||
<li>Are users used to completing tasks inside WeChat? (mini program)</li>
|
||||
<li>Will users spend long sessions in office scenarios? (desktop app)</li>
|
||||
<li>Do users need to find you via search engines? (website)</li>
|
||||
</ul>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
<el-card shadow="hover" style="margin: 20px 0; border-radius: 12px; border-left: 4px solid #67C23A;">
|
||||
<template #header>
|
||||
<div style="display: flex; align-items: center; gap: 8px;">
|
||||
<span style="font-size: 20px;">⚡</span>
|
||||
<span style="font-weight: bold; font-size: 16px;">Question 2: What capabilities does your app need?</span>
|
||||
</div>
|
||||
</template>
|
||||
<div style="line-height: 1.8; color: #606266;">
|
||||
<ul>
|
||||
<li>Does it need access to camera, microphone, GPS, or other hardware?</li>
|
||||
<li>Does it need offline support?</li>
|
||||
<li>Does it need push notifications?</li>
|
||||
<li>Does it need to process large amounts of local data?</li>
|
||||
</ul>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
<el-card shadow="hover" style="margin: 20px 0; border-radius: 12px; border-left: 4px solid #E6A23C;">
|
||||
<template #header>
|
||||
<div style="display: flex; align-items: center; gap: 8px;">
|
||||
<span style="font-size: 20px;">💰</span>
|
||||
<span style="font-weight: bold; font-size: 16px;">Question 3: How many resources do you have?</span>
|
||||
</div>
|
||||
</template>
|
||||
<div style="line-height: 1.8; color: #606266;">
|
||||
<ul>
|
||||
<li>What is your development time budget?</li>
|
||||
<li>Do you have a Mac device (required for iOS development)?</li>
|
||||
<li>Do you need to cover multiple platforms at once?</li>
|
||||
</ul>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
---
|
||||
|
||||
## 3 Platform Selection Decision Table
|
||||
|
||||
Use this table to quickly identify your fit:
|
||||
|
||||
| Your scenario | Recommended platform | Why |
|
||||
|---------|---------|------|
|
||||
| Users are in WeChat ecosystem and you want fast user growth | <el-tag type="success">WeChat Mini Program</el-tag> | No download needed, easy WeChat sharing, low acquisition cost |
|
||||
| Need continuous GPS tracking in background and health data access | <el-tag type="primary">iOS / Android Native</el-tag> | Direct system API access, best performance |
|
||||
| Want one codebase for multiple platforms | <el-tag type="warning">PWA / Electron</el-tag> | High efficiency, low maintenance cost |
|
||||
| Users need long sessions on computers | <el-tag type="primary">Desktop App</el-tag> (Electron / Qt) | Separate window, offline support, strong system integration |
|
||||
| Need auto summary/translation/password management while browsing | <el-tag type="info">Browser Extension</el-tag> | Can read/modify webpage content, launches with browser |
|
||||
| Want technical articles/project showcase indexed by Google | <el-tag type="warning">Website / Personal Blog</el-tag> | SEO-friendly, searchable content |
|
||||
| Want to issue tradable digital membership cards or collectibles | <el-tag type="danger">NFT Smart Contract</el-tag> | On-chain ownership, transferable/tradable |
|
||||
|
||||
---
|
||||
|
||||
## 4 Practical Scenario Examples
|
||||
|
||||
### Scenario 1: I want to build a community group-buy tool
|
||||
|
||||
**💡 Recommended: WeChat Mini Program**
|
||||
|
||||
Why mini program?
|
||||
|
||||
- **Users are already in WeChat**: community users are active in WeChat groups; mini programs can be shared directly in groups
|
||||
- **Use-and-go behavior**: nobody wants to install a dedicated app just to order vegetables
|
||||
- **Seamless payment**: one-tap WeChat Pay, no context switching
|
||||
- **Low acquisition cost**: one group-sharing flow can bring dozens of users
|
||||
|
||||
::: tip 💡 Applicable scenarios
|
||||
If your product is similar - group buying, booking, surveys, event signup - mini programs are usually the first choice.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Scenario 2: I want to build a running tracker app
|
||||
|
||||
**⚡ Recommended: iOS / Android Native**
|
||||
|
||||
Why native app?
|
||||
|
||||
- **Background running**: app must keep tracking route during running, which mini programs and websites cannot reliably do
|
||||
- **GPS precision**: native apps can access high-precision location with small error range
|
||||
- **Health data access**: step count and heart rate access needs Apple HealthKit / Google Fit
|
||||
- **Reliable push reminders**: daily "time to run" reminders are best done via native push
|
||||
|
||||
::: warning ⚠️ Important note
|
||||
Any app that requires **long-term background execution** or **deep hardware access** should choose native development.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Scenario 3: I want to build a bookkeeping app
|
||||
|
||||
**📝 Recommended: PWA or Mini Program**
|
||||
|
||||
Why?
|
||||
|
||||
- **High frequency but short sessions**: one record per day, done in 30 seconds
|
||||
- **No complex hardware needs**: mostly data entry and display
|
||||
- **Strong cross-platform requirement**: users may record on phone and review reports on desktop
|
||||
- **Offline scenario**: users may want to log expenses in subway with no signal
|
||||
|
||||
PWA can be installed on home screen and feels like an app, while development cost is about one-third of native. Mini programs are often better for China users.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 4: I want to build an online education platform
|
||||
|
||||
**📚 Recommended: Website + Mini Program combination**
|
||||
|
||||
Why?
|
||||
|
||||
- **Website handles acquisition**: course pages, instructor profiles, SEO optimization
|
||||
- **Mini program handles conversion**: trial class, enrollment payment, group join via QR
|
||||
- **Website handles delivery**: video playback is better on larger web screens
|
||||
- **Mini program handles touchpoints**: class reminders and homework notifications
|
||||
|
||||
::: tip 💡 Combination strategy
|
||||
Complex business often needs a **multi-platform combination**, not a single platform.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Scenario 5: I want to build a team collaboration tool
|
||||
|
||||
**🤝 Recommended: Electron desktop app + web version**
|
||||
|
||||
Why?
|
||||
|
||||
- **Desktop side**: users keep computers on at work; desktop apps can stay resident and receive messages
|
||||
- **Web side**: temporary use on other computers without installation
|
||||
- **System integration**: desktop app can access local files, system notifications, and shortcuts
|
||||
- **One codebase**: Electron uses web stack, and desktop/web can reuse about 80% code
|
||||
|
||||
Slack, Notion, and Discord all follow this pattern.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 6: I want to build a password manager
|
||||
|
||||
**🔐 Recommended: Desktop app + browser extension**
|
||||
|
||||
Why?
|
||||
|
||||
- **Desktop app**: secure local password database storage, supports biometric unlock
|
||||
- **Browser extension**: autofill on login pages without switching windows
|
||||
- **Offline availability**: password data stored locally, independent of network
|
||||
- **Security control**: users know where their data is, reducing cloud leakage concerns
|
||||
|
||||
1Password and Bitwarden both use this combination.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 7: I want to build a content creation platform
|
||||
|
||||
**✍️ Recommended: Website + personal blog**
|
||||
|
||||
Why?
|
||||
|
||||
- **SEO is the lifeline**: search is your largest long-term traffic source
|
||||
- **Content is product**: articles, tutorials, and videos are core value
|
||||
- **Long-term asset**: websites can operate for years, while social accounts can be suspended anytime
|
||||
- **Flexible monetization**: ads, paid subscriptions, and knowledge commerce can all run on websites
|
||||
|
||||
Medium, Zhihu columns, and personal tech blogs are all essentially content platforms.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 8: I want to build a developer productivity tool
|
||||
|
||||
**🛠️ Recommended: VS Code extension or CLI tool**
|
||||
|
||||
Why?
|
||||
|
||||
- **Users are already inside the editor**: developers dislike context switching
|
||||
- **Context awareness**: tools can read current code and provide precise suggestions
|
||||
- **Easy distribution**: publish to extension marketplace and users install with one click
|
||||
- **Fast iteration**: no app store review delays, same-day release/update
|
||||
|
||||
Prettier, ESLint, and GitHub Copilot are all VS Code extensions.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 9: I want to build an industrial monitoring dashboard
|
||||
|
||||
**🏭 Recommended: Qt desktop application**
|
||||
|
||||
Why?
|
||||
|
||||
- **Stability above all**: factories run 24/7 and software cannot crash
|
||||
- **Hardware communication**: needs serial/Modbus communication with sensors
|
||||
- **Real-time charting**: pressure/temperature/flow often need millisecond refresh
|
||||
- **Industrial environment**: industrial computers commonly run Windows, and Qt compatibility is strong
|
||||
|
||||
::: warning ⚠️ Industrial scenarios
|
||||
Industrial scenarios require stability and hardware interfaces that web technologies usually cannot satisfy.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Scenario 10: I want to issue a digital membership card
|
||||
|
||||
**🎫 Recommended: NFT smart contract**
|
||||
|
||||
Why?
|
||||
|
||||
- **Unforgeable**: on-chain records cannot be tampered with
|
||||
- **Transferable**: memberships can be gifted or traded on secondary markets
|
||||
- **Programmable**: smart contracts can automate benefits (for example auto-upgrade after one year)
|
||||
- **Global reach**: no national boundaries, global participation possible
|
||||
|
||||
Starbucks Odyssey and NBA Top Shot both use NFTs in membership systems.
|
||||
|
||||
---
|
||||
|
||||
## 5 Quick Platform Capability Comparison
|
||||
|
||||
### 5.1 Mobile Solution Comparison
|
||||
|
||||
| Capability | WeChat Mini Program | iOS Native | Android Native | PWA |
|
||||
|-----|----------|---------|-------------|-----|
|
||||
| User acquisition cost | <el-tag type="success">Low</el-tag> (WeChat sharing) | <el-tag type="danger">High</el-tag> (app store) | <el-tag type="danger">High</el-tag> (app store) | <el-tag type="warning">Medium</el-tag> (search engines) |
|
||||
| Offline usage | <el-tag type="warning">Limited</el-tag> | <el-tag type="success">Full</el-tag> | <el-tag type="success">Full</el-tag> | <el-tag type="success">Supported</el-tag> |
|
||||
| Push notifications | <el-tag type="success">Supported</el-tag> | <el-tag type="success">Supported</el-tag> | <el-tag type="success">Supported</el-tag> | <el-tag type="warning">Partial</el-tag> |
|
||||
| Hardware access | <el-tag type="warning">Restricted</el-tag> | <el-tag type="success">Full access</el-tag> | <el-tag type="success">Full access</el-tag> | <el-tag type="warning">Restricted</el-tag> |
|
||||
| Background running | <el-tag type="warning">Restricted</el-tag> | <el-tag type="success">Supported</el-tag> | <el-tag type="success">Supported</el-tag> | <el-tag type="warning">Restricted</el-tag> |
|
||||
| Development cost | <el-tag type="success">Low</el-tag> | <el-tag type="danger">High</el-tag> | <el-tag type="danger">High</el-tag> | <el-tag type="success">Low</el-tag> |
|
||||
| Review required | <el-tag type="warning">Yes</el-tag> | <el-tag type="warning">Yes</el-tag> | <el-tag type="warning">Yes</el-tag> | <el-tag type="success">No</el-tag> |
|
||||
|
||||
### 5.2 Desktop Solution Comparison
|
||||
|
||||
| Capability | Electron | Qt | Browser Extension |
|
||||
|-----|----------|-----|-----------|
|
||||
| Cross-platform | Win/Mac/Linux | Win/Mac/Linux | Chrome/Edge/Firefox |
|
||||
| System integration | <el-tag type="warning">Medium</el-tag> | <el-tag type="success">High</el-tag> | <el-tag type="warning">Low</el-tag> |
|
||||
| Offline usage | <el-tag type="success">Supported</el-tag> | <el-tag type="success">Supported</el-tag> | <el-tag type="warning">Partial</el-tag> |
|
||||
| Hardware access | <el-tag type="warning">Via Node.js</el-tag> | <el-tag type="success">Full access</el-tag> | <el-tag type="warning">Restricted</el-tag> |
|
||||
| Installation | Installer package | Installer package | Browser extension store |
|
||||
| Development stack | Web technologies | C++/QML | JavaScript |
|
||||
|
||||
---
|
||||
|
||||
## 6 Common Misconceptions
|
||||
|
||||
<el-collapse accordion style="margin: 20px 0;">
|
||||
<el-collapse-item name="1">
|
||||
<template #title>
|
||||
<span style="font-weight: bold; color: #F56C6C;">❌ Misconception 1: "I want to build an app, so I must build both iOS and Android"</span>
|
||||
</template>
|
||||
<div style="padding: 10px; color: #606266; line-height: 1.8;">
|
||||
Not necessarily. If your app is lightweight and use-and-go, a mini program or PWA may be a better choice. Native development is worth it only when you need deep system access or top-end performance.
|
||||
</div>
|
||||
</el-collapse-item>
|
||||
|
||||
<el-collapse-item name="2">
|
||||
<template #title>
|
||||
<span style="font-weight: bold; color: #F56C6C;">❌ Misconception 2: "Websites are outdated and nobody reads them anymore"</span>
|
||||
</template>
|
||||
<div style="padding: 10px; color: #606266; line-height: 1.8;">
|
||||
The opposite is true. Websites are the only platform indexable by search engines. If you want content-driven user growth, websites and personal blogs are top choices. Technical articles and project showcases can continuously bring SEO traffic.
|
||||
</div>
|
||||
</el-collapse-item>
|
||||
|
||||
<el-collapse-item name="3">
|
||||
<template #title>
|
||||
<span style="font-weight: bold; color: #F56C6C;">❌ Misconception 3: "Desktop apps are no longer used"</span>
|
||||
</template>
|
||||
<div style="padding: 10px; color: #606266; line-height: 1.8;">
|
||||
In office scenarios, desktop apps are still mainstream. VS Code, Slack, and Notion are all desktop apps. If your app needs long-session usage, heavy data handling, or system integration, desktop is often the best choice.
|
||||
</div>
|
||||
</el-collapse-item>
|
||||
|
||||
<el-collapse-item name="4">
|
||||
<template #title>
|
||||
<span style="font-weight: bold; color: #F56C6C;">❌ Misconception 4: "PWA experience is worse than native"</span>
|
||||
</template>
|
||||
<div style="padding: 10px; color: #606266; line-height: 1.8;">
|
||||
Modern PWAs are already very close to native experience. Starbucks, Pinterest, and Uber all have PWA versions. If your app does not require complex hardware integration, PWA is often the most cost-effective cross-platform solution.
|
||||
</div>
|
||||
</el-collapse-item>
|
||||
</el-collapse>
|
||||
|
||||
---
|
||||
|
||||
## 7 Summary: Platform Selection Decision Flow
|
||||
|
||||
```text
|
||||
Start
|
||||
│
|
||||
├─ Are users in WeChat ecosystem? ───────────────────→ WeChat Mini Program
|
||||
│
|
||||
├─ Need best performance and deep hardware access? ──→ iOS / Android Native
|
||||
│
|
||||
├─ Need long usage sessions on computers? ───────────→ Desktop App
|
||||
│ │
|
||||
│ ├─ Industrial scenario? ───────────────────────→ Qt
|
||||
│ └─ General scenario? ──────────────────────────→ Electron
|
||||
│
|
||||
├─ Need to process browser page content? ────────────→ Browser Extension
|
||||
│
|
||||
├─ Lightweight + cross-platform + offline? ──────────→ PWA
|
||||
│
|
||||
├─ Need to be discoverable by search? ───────────────→ Website / Blog
|
||||
│
|
||||
├─ Developer tool? ───────────────────────────────────→ VS Code Extension
|
||||
│
|
||||
└─ Blockchain asset? ────────────────────────────────→ NFT Smart Contract
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8 Next Step
|
||||
|
||||
::: tip 🎯 Start Taking Action
|
||||
Based on the analysis above, you should now have a preliminary answer to "which platform to choose." Next, click the matching tutorial to start:
|
||||
:::
|
||||
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.3-wechat-miniprogram/"
|
||||
title="How to Build a WeChat Mini Program"
|
||||
description="Build a WeChat Mini Program from scratch and master the core development workflow"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.5-android-app/"
|
||||
title="How to Build an Android App"
|
||||
description="Build Android-native applications with modern cross-platform frameworks"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.6-ios-app/"
|
||||
title="How to Build an iOS App"
|
||||
description="Develop and publish iOS applications with Apple ecosystem best practices"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.8-pwa-local-app/"
|
||||
title="How to Build a Local PWA App"
|
||||
description="Turn a website into a real app with offline support and desktop installation"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.9-browser-ai-extension/"
|
||||
title="How to Build a Browser AI Assistant Extension"
|
||||
description="Summarize any webpage in one click and build your browser AI assistant"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.10-electron-voice-to-text/"
|
||||
title="How to Build a Cross-Platform Electron Desktop App"
|
||||
description="Build a speech-to-text desktop app for Windows, macOS, and Linux"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.12-vscode-extension/"
|
||||
title="How to Build a VS Code Extension"
|
||||
description="Create your AI project assistant with multi-file Q&A and custom shortcuts"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.13-qt-industrial-hmi/"
|
||||
title="How to Build a Qt Industrial HMI"
|
||||
description="Build an industrial-grade human-machine interface that connects to real hardware"
|
||||
/>
|
||||
</NavGrid>
|
||||
@@ -0,0 +1,500 @@
|
||||
# How to Build a Cross-Platform Electron Desktop App: A Speech-to-Text Application
|
||||
|
||||
# Chapter 1: What Electron and Desktop App Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: build a speech-to-text desktop app from scratch with Electron, support both cloud API and local model recognition modes, and finally package it into a real desktop application that can be installed and run on Windows, macOS, and Linux.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- A computer (Windows or Mac, Mac is recommended because local models run very fast on Apple Silicon)
|
||||
- A Node.js environment (version 18.0 or above)
|
||||
- Your AI coding assistant (Cursor / Trae / Claude Code)
|
||||
- (Optional) An OpenAI API Key (if you use cloud mode)
|
||||
- A microphone (the built-in laptop microphone is fine)
|
||||
|
||||
## 1.1 What Is Electron?
|
||||
|
||||
Apps you use every day, such as **VS Code, Slack, Discord, and Notion**, have one thing in common: they are all desktop applications built with **Electron**.
|
||||
|
||||
Electron is an open-source framework that lets you use **HTML + CSS + JavaScript** (the same stack used for web pages) to build desktop apps that run across **Windows, macOS, and Linux**. Its principle is simple: package Chromium and Node.js together, and your web page becomes a standalone desktop app.
|
||||
|
||||
**One-sentence understanding**: Electron = an "invisible Chrome browser" + Node.js system capabilities.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 Core Electron Architecture
|
||||
|
||||
An Electron app consists of two process types. Understanding them is the key to development:
|
||||
|
||||
**Main Process**
|
||||
|
||||
* The "general manager" of the app
|
||||
* Responsible for creating windows, managing app lifecycle, and accessing native capabilities such as the file system
|
||||
* Runs in the Node.js environment and can use all Node.js modules
|
||||
* There is only one main process per app
|
||||
|
||||
**Renderer Process**
|
||||
|
||||
* The "front face" of the app
|
||||
* Essentially a Chromium web page responsible for UI rendering
|
||||
* Each window corresponds to one renderer process
|
||||
* For security reasons, the renderer process cannot directly access Node.js APIs
|
||||
|
||||
**Preload Script**
|
||||
|
||||
* The "bridge" between the main process and renderer process
|
||||
* Uses `contextBridge` to safely expose selected APIs to the renderer process
|
||||
|
||||
They communicate through **IPC (Inter-Process Communication)**, like making a phone call: the renderer says "I want to start recording," and the main process receives that request and calls the system microphone.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.3 What Are We Building?
|
||||
|
||||
In this tutorial, we will build a **Speech-to-Text** desktop app. Its functionality is straightforward:
|
||||
|
||||
1. Click the "Start Recording" button, and the app starts listening to the microphone
|
||||
2. After speaking, click "Stop," and the app sends audio to AI for recognition
|
||||
3. The recognized text is displayed in the UI and can be copied with one click
|
||||
|
||||
**Two recognition modes are available:**
|
||||
|
||||
| Comparison Dimension | Cloud API Mode | Local Model Mode |
|
||||
|---------|-------------|------------|
|
||||
| Representative Solution | OpenAI Whisper API | whisper.cpp |
|
||||
| Internet Required | Yes | No |
|
||||
| Recognition Speed | Depends on network | Depends on hardware (very fast on Apple Silicon) |
|
||||
| Chinese Recognition Quality | Excellent | Excellent (large-v3 model) |
|
||||
| Cost | $0.006/minute | Free |
|
||||
| Model Size | No download required | tiny model 75MB, large model 3GB |
|
||||
| Best For | Fast onboarding, lightweight usage | Privacy-focused, offline usage, long-term high-frequency usage |
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.4 Important Note: Web Speech API Is Not Available in Electron
|
||||
|
||||
If you have searched for "Electron speech recognition," you may have seen recommendations to use the browser's built-in `Web Speech API`. **Please note: this does not work in Electron.**
|
||||
|
||||
Google has discontinued speech API support for non-Chrome/Edge browser shells. Electron is Chromium-based, but it is not Chrome itself, so `window.SpeechRecognition` will fail directly.
|
||||
|
||||
That is why we need independent solutions such as OpenAI Whisper API or whisper.cpp.
|
||||
|
||||
## 1.5 Tutorial Roadmap
|
||||
|
||||
We will complete the full flow in the following steps:
|
||||
|
||||
1. **Create an Electron project**: Use Electron Forge to scaffold the project and understand inter-process communication
|
||||
2. **Implement recording**: Capture microphone input in the renderer process and process audio data
|
||||
3. **Cloud recognition (Option A)**: Use OpenAI Whisper API for speech-to-text
|
||||
4. **Local recognition (Option B)**: Use whisper.cpp locally without internet access
|
||||
5. **Packaging and distribution**: Package the app into an installable desktop program
|
||||
|
||||
# Chapter 2: Create the Electron Project
|
||||
|
||||
## 2.1 Initialize the Project with AI
|
||||
|
||||
Open your AI coding assistant and enter this prompt:
|
||||
|
||||
```
|
||||
Please help me create a new Electron project with Electron Forge using the Vite template.
|
||||
The project name is voice-to-text.
|
||||
Please run: npx create-electron-app voice-to-text --template=vite
|
||||
After creation, enter the project directory and install dependencies.
|
||||
```
|
||||
|
||||
Electron Forge is the official Electron-recommended scaffolding tool. It helps with project initialization, packaging, distribution, and other tedious setup tasks.
|
||||
|
||||
After creation, the project structure is roughly:
|
||||
|
||||
```text
|
||||
voice-to-text/
|
||||
├── src/
|
||||
│ ├── main.js # Main process entry
|
||||
│ ├── preload.js # Preload script (bridge)
|
||||
│ ├── renderer.js # Renderer process entry
|
||||
│ └── index.html # App HTML page
|
||||
├── forge.config.js # Electron Forge config
|
||||
├── vite.main.config.mjs # Main process Vite config
|
||||
├── vite.preload.config.mjs # Preload script Vite config
|
||||
├── vite.renderer.config.mjs # Renderer process Vite config
|
||||
└── package.json
|
||||
```
|
||||
|
||||
## 2.2 Start and Preview
|
||||
|
||||
Ask AI to start the development server:
|
||||
|
||||
```
|
||||
Please help me start the Electron development server by running npm start
|
||||
```
|
||||
|
||||
After a few seconds, a desktop window appears. This is your Electron app. Even though it only shows a default welcome page now, it is already a real desktop program.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 2.3 Understand IPC (Inter-Process Communication)
|
||||
|
||||
Before implementing speech features, we need to understand Electron's most important concept: **IPC (Inter-Process Communication)**.
|
||||
|
||||
Because the renderer process (UI) and main process (system capabilities) are isolated, they must use IPC "phone calls" to collaborate:
|
||||
|
||||
```text
|
||||
Renderer process (UI) Main process (system)
|
||||
│ │
|
||||
│── "I want to start recording" ──────────→ │
|
||||
│ │── Call microphone
|
||||
│ │── Process audio
|
||||
│ ←──── "Here is the result" ─────────────│
|
||||
│ │
|
||||
│── Display text in UI │
|
||||
```
|
||||
|
||||
In code, this communication is bridged via `preload.js`:
|
||||
|
||||
```javascript
|
||||
// preload.js - safely expose APIs to renderer process
|
||||
const { contextBridge, ipcRenderer } = require('electron')
|
||||
|
||||
contextBridge.exposeInMainWorld('electronAPI', {
|
||||
// Renderer -> Main
|
||||
sendAudio: (audioData) => ipcRenderer.invoke('transcribe-audio', audioData),
|
||||
// Main -> Renderer
|
||||
onResult: (callback) => ipcRenderer.on('transcription-result', callback)
|
||||
})
|
||||
```
|
||||
|
||||
```javascript
|
||||
// main.js - main process listens for messages
|
||||
const { ipcMain } = require('electron')
|
||||
|
||||
ipcMain.handle('transcribe-audio', async (event, audioData) => {
|
||||
// Call Whisper API or whisper.cpp here
|
||||
const text = await transcribe(audioData)
|
||||
return text
|
||||
})
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 3: Implement Recording
|
||||
|
||||
## 3.1 Capture Microphone Input in the Renderer Process
|
||||
|
||||
The browser (which is the Electron renderer process) provides `navigator.mediaDevices.getUserMedia` to access the microphone. Ask AI to help implement recording:
|
||||
|
||||
```
|
||||
Please help me modify src/index.html and src/renderer.js to implement:
|
||||
|
||||
UI:
|
||||
1. A large circular "Start Recording" button, which turns into a red "Stop Recording" button when clicked
|
||||
2. Show a simple pulse animation while recording
|
||||
3. A text display area below for recognition results
|
||||
4. Two buttons at the bottom: "Copy Text" and "Clear"
|
||||
5. A settings icon at top-right to switch recognition mode (cloud/local)
|
||||
|
||||
Recording logic (in renderer.js):
|
||||
1. On button click, request microphone access via navigator.mediaDevices.getUserMedia
|
||||
2. Use MediaRecorder to record audio in webm format
|
||||
3. After stopping, convert audio Blob to ArrayBuffer
|
||||
4. Send it to main process via window.electronAPI.sendAudio
|
||||
5. Wait for recognition result from main process and display it
|
||||
```
|
||||
|
||||
Core recording code:
|
||||
|
||||
```javascript
|
||||
// renderer.js
|
||||
let mediaRecorder = null
|
||||
let audioChunks = []
|
||||
|
||||
async function startRecording() {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
sampleRate: 16000,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true
|
||||
}
|
||||
})
|
||||
|
||||
mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: 'audio/webm;codecs=opus'
|
||||
})
|
||||
|
||||
audioChunks = []
|
||||
mediaRecorder.ondataavailable = (e) => audioChunks.push(e.data)
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' })
|
||||
const arrayBuffer = await audioBlob.arrayBuffer()
|
||||
|
||||
// Send to main process for transcription
|
||||
const result = await window.electronAPI.sendAudio(arrayBuffer)
|
||||
document.getElementById('result').textContent = result
|
||||
}
|
||||
|
||||
mediaRecorder.start()
|
||||
}
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.2 Handle Microphone Permissions
|
||||
|
||||
Electron blocks permission requests by default. We need to explicitly allow microphone access in the main process:
|
||||
|
||||
```
|
||||
Please help me add microphone permission handling in main.js:
|
||||
1. Use session.defaultSession.setPermissionRequestHandler to handle permission requests
|
||||
2. Auto-allow when request type is 'media'
|
||||
3. For macOS, ensure microphone usage description is declared in package.json or entitlements
|
||||
```
|
||||
|
||||
```javascript
|
||||
// Add to main.js
|
||||
const { session } = require('electron')
|
||||
|
||||
session.defaultSession.setPermissionRequestHandler(
|
||||
(webContents, permission, callback) => {
|
||||
if (permission === 'media') {
|
||||
callback(true)
|
||||
} else {
|
||||
callback(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
> **Note for macOS users**: macOS will show a system-level microphone permission dialog. This is normal. Click "Allow."
|
||||
|
||||
# Chapter 4: Option A - Cloud Recognition (OpenAI Whisper API)
|
||||
|
||||
This is the simplest option. You only need an API key and a few lines of code.
|
||||
|
||||
## 4.1 Get an OpenAI API Key
|
||||
|
||||
1. Visit [OpenAI Platform](https://platform.openai.com/), sign up, and log in
|
||||
2. Go to the API Keys page and click **"Create new secret key"**
|
||||
3. Copy the generated key (starts with `sk-`) and store it safely
|
||||
|
||||
> **Cost reference**: Whisper API costs **$0.006/minute**. That means recognizing 1 hour of audio only costs $0.36, which is very affordable.
|
||||
|
||||
## 4.2 Call Whisper API in the Main Process
|
||||
|
||||
Ask AI to implement speech recognition in the main process:
|
||||
|
||||
```
|
||||
Please help me implement OpenAI Whisper API in main.js:
|
||||
1. Install node-fetch (if needed) or use built-in fetch in Node.js
|
||||
2. Create transcribeWithWhisper function that accepts audio ArrayBuffer
|
||||
3. Convert ArrayBuffer to Blob/File and build FormData
|
||||
4. Call https://api.openai.com/v1/audio/transcriptions
|
||||
5. Use model whisper-1 and set language to zh (Chinese)
|
||||
6. Return the recognized text
|
||||
7. Read API key from environment variables or config file
|
||||
```
|
||||
|
||||
Core code:
|
||||
|
||||
```javascript
|
||||
// main.js
|
||||
async function transcribeWithWhisper(audioBuffer, apiKey) {
|
||||
const blob = new Blob([audioBuffer], { type: 'audio/webm' })
|
||||
const formData = new FormData()
|
||||
formData.append('file', blob, 'audio.webm')
|
||||
formData.append('model', 'whisper-1')
|
||||
formData.append('language', 'zh')
|
||||
|
||||
const response = await fetch(
|
||||
'https://api.openai.com/v1/audio/transcriptions',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
body: formData
|
||||
}
|
||||
)
|
||||
|
||||
const data = await response.json()
|
||||
return data.text
|
||||
}
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 4.3 Add a Settings UI
|
||||
|
||||
Ask AI to add a simple settings panel in the renderer process to input API key and switch recognition mode:
|
||||
|
||||
```
|
||||
Please help me add a settings panel in index.html:
|
||||
1. Add a gear icon in the top-right corner; click to expand settings panel
|
||||
2. The panel includes:
|
||||
- Recognition mode switch (Cloud API / Local model)
|
||||
- API Key input (only visible in cloud mode)
|
||||
- Language dropdown (Chinese / English / Auto detect)
|
||||
3. Save settings to localStorage
|
||||
4. Close panel when clicking outside
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 5: Option B - Local Recognition (whisper.cpp)
|
||||
|
||||
If you do not want to rely on cloud APIs, or if you need offline usage, whisper.cpp is the best choice. It is a C++ port of the OpenAI Whisper model and runs fully locally without internet.
|
||||
|
||||
## 5.1 Install whisper.cpp Node.js Bindings
|
||||
|
||||
Ask AI to install and configure:
|
||||
|
||||
```
|
||||
Please help me install nodejs-whisper in the project:
|
||||
npm install nodejs-whisper
|
||||
|
||||
After installation, please help me download the whisper tiny model (small size, fast for testing).
|
||||
nodejs-whisper will handle model download automatically.
|
||||
```
|
||||
|
||||
> **Model selection guide**:
|
||||
> * `tiny` (75MB): fastest, good for testing and lightweight usage, average accuracy
|
||||
> * `base` (142MB): balance between speed and accuracy
|
||||
> * `small` (466MB): clearly better Chinese recognition quality
|
||||
> * `large-v3-turbo` (1.5GB): recommended; 5-8x faster than large, with only 1-2% lower accuracy
|
||||
> * `large-v3` (3GB): highest accuracy, but slower and needs better hardware
|
||||
|
||||
## 5.2 Integrate whisper.cpp in Main Process
|
||||
|
||||
Ask AI to implement local recognition:
|
||||
|
||||
```
|
||||
Please help me add whisper.cpp local recognition in main.js:
|
||||
1. Import nodejs-whisper
|
||||
2. Create transcribeWithLocal function
|
||||
3. Accept audio ArrayBuffer and save it as a temporary WAV file first (16kHz mono)
|
||||
4. Call nodejs-whisper for recognition
|
||||
5. Return recognized text
|
||||
6. Delete temporary file after recognition
|
||||
```
|
||||
|
||||
Core code:
|
||||
|
||||
```javascript
|
||||
// main.js
|
||||
const { nodewhisper } = require('nodejs-whisper')
|
||||
const path = require('path')
|
||||
const fs = require('fs')
|
||||
const os = require('os')
|
||||
|
||||
async function transcribeWithLocal(audioBuffer) {
|
||||
// Save as temp file
|
||||
const tempPath = path.join(os.tmpdir(), `recording-${Date.now()}.wav`)
|
||||
fs.writeFileSync(tempPath, Buffer.from(audioBuffer))
|
||||
|
||||
try {
|
||||
const result = await nodewhisper(tempPath, {
|
||||
modelName: 'base',
|
||||
autoDownloadModelName: 'base',
|
||||
whisperOptions: {
|
||||
language: 'zh',
|
||||
word_timestamps: true
|
||||
}
|
||||
})
|
||||
return result.map(r => r.speech).join('')
|
||||
} finally {
|
||||
// Clean up temp file
|
||||
fs.unlinkSync(tempPath)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 5.3 Good News for Apple Silicon Users
|
||||
|
||||
If you are using an M1/M2/M3/M4 Mac, whisper.cpp can automatically use **Metal GPU acceleration** and **Apple Neural Engine**. Recognition can run **faster than real-time**, which means 1 minute of audio may only take a few seconds to process.
|
||||
|
||||
For NVIDIA GPU users, whisper.cpp also supports **CUDA acceleration**, which provides strong performance too.
|
||||
|
||||
# Chapter 6: Packaging and Distribution
|
||||
|
||||
After development is complete, we need to package the app into distributable installers.
|
||||
|
||||
## 6.1 Package with Electron Forge
|
||||
|
||||
Electron Forge is already included in our project, so packaging is simple:
|
||||
|
||||
```
|
||||
Please help me run the Electron Forge packaging command:
|
||||
npx electron-forge make
|
||||
```
|
||||
|
||||
This command automatically generates installers for your current operating system:
|
||||
|
||||
* **macOS**: `.dmg` installer image and `.zip` archive
|
||||
* **Windows**: `.exe` installer (Squirrel format)
|
||||
* **Linux**: `.deb` (Debian/Ubuntu) and `.rpm` (Fedora) packages
|
||||
|
||||
Build outputs are in the `out/make/` directory.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 6.2 App Size Optimization
|
||||
|
||||
One "pain point" of Electron apps is large package size (because Chromium is bundled). Optimization suggestions:
|
||||
|
||||
* Ensure only packages in `dependencies` are bundled, and keep dev dependencies in `devDependencies`
|
||||
* Use Vite tree-shaking to reduce JavaScript size
|
||||
* If using local models, consider downloading models on first launch instead of bundling them into the installer
|
||||
|
||||
| Configuration | Estimated Size |
|
||||
|------|---------|
|
||||
| Pure Electron app (no model) | ~150-200 MB |
|
||||
| + whisper tiny model | ~250 MB |
|
||||
| + whisper large-v3-turbo model | ~1.7 GB |
|
||||
|
||||
## 6.3 Cross-Platform Notes
|
||||
|
||||
**macOS:**
|
||||
* Publishing to App Store or distributing to others requires **code signing** (Apple Developer ID, $99/year)
|
||||
* Also requires Apple's **Notarization** process
|
||||
* Microphone permissions must declare `NSMicrophoneUsageDescription` in `Info.plist`
|
||||
* Recommend building a Universal Binary to support both Intel and Apple Silicon
|
||||
|
||||
**Windows:**
|
||||
* Code signing is recommended, otherwise Windows SmartScreen will show security warnings
|
||||
* Users can still choose "Run anyway" for unsigned apps
|
||||
|
||||
**Linux:**
|
||||
* No code signing required
|
||||
* Recommended to provide both `.deb` and `.AppImage` formats
|
||||
|
||||
> **Tip**: For personal projects or small-scale distribution, you can temporarily skip code signing and directly share packaged files with friends.
|
||||
|
||||
# Chapter 7: Final Notes
|
||||
|
||||
Congratulations! You have built a cross-platform speech-to-text desktop app from scratch. Let's recap what we did:
|
||||
|
||||
1. Used Electron Forge to scaffold a cross-platform desktop app
|
||||
2. Understood main process, renderer process, and IPC communication
|
||||
3. Implemented microphone recording and audio capture
|
||||
4. Integrated two speech recognition options: cloud Whisper API and local whisper.cpp
|
||||
5. Learned how to package and distribute an Electron app
|
||||
|
||||
What makes Electron powerful is that you can build desktop apps at the level of VS Code or Slack using a web-tech stack. And with mature AI speech recognition, a feature like speech-to-text, once requiring a specialized team, can now be built by one person.
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Real-time subtitles**: Use AudioWorklet for streaming audio and pair with streaming recognition APIs for live transcription
|
||||
* **Meeting assistant**: Record full meetings, auto-generate timestamped transcripts, and summarize key points with AI
|
||||
* **Multilingual translation**: Transcribe speech and call translation APIs for real-time language conversion
|
||||
* **Voice notebook**: Combine with a local database (such as SQLite) to build searchable voice notes
|
||||
|
||||
***Let your voice, and let code record everything for you.***
|
||||
|
||||
# References
|
||||
|
||||
* [Electron Official Docs](https://www.electronjs.org/docs/latest/)
|
||||
* [Electron Forge Official Docs](https://www.electronforge.io/)
|
||||
* [OpenAI Whisper API Docs](https://platform.openai.com/docs/guides/speech-to-text)
|
||||
* [whisper.cpp GitHub Repository](https://github.com/ggml-org/whisper.cpp)
|
||||
* [nodejs-whisper npm Package](https://www.npmjs.com/package/nodejs-whisper)
|
||||
* [MDN MediaDevices.getUserMedia()](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia)
|
||||
@@ -0,0 +1,361 @@
|
||||
# How to Quickly Build and Mint an NFT: 10-Minute Starter Edition
|
||||
|
||||
# Chapter 1: What NFTs and Smart Contracts Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: write an NFT smart contract from scratch, deploy it to the Ethereum testnet, mint your own NFT, and view it on OpenSea. The whole process uses browser-based tools with no local environment setup required, and can be finished in 10 minutes.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- Chrome browser (with MetaMask wallet extension installed)
|
||||
- A MetaMask wallet account
|
||||
- A small amount of Sepolia testnet ETH (free to claim, shown below)
|
||||
|
||||
> **Zero cost, zero setup**: the entire process uses browser-based tools (Remix IDE), no Node.js / Hardhat installation needed; code uses OpenZeppelin official secure templates; after minting, you can view your NFT on OpenSea testnet.
|
||||
|
||||
## 1.1 What Is an NFT?
|
||||
|
||||
NFT (Non-Fungible Token) is a type of digital asset on blockchain. Unlike fungible tokens such as Bitcoin or Ether, every NFT is unique, like no two paintings in the world being exactly the same.
|
||||
|
||||
You can understand an NFT as a **"certificate of collection in the digital world."** It can represent:
|
||||
|
||||
* ownership of a digital artwork
|
||||
* an event ticket
|
||||
* a game item
|
||||
* a learning certificate
|
||||
* even a tweet
|
||||
|
||||
The core value of NFTs is: **they use blockchain technology to prove "this digital item belongs to you," and that proof is public, transparent, and tamper-resistant.**
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 What Is a Smart Contract?
|
||||
|
||||
A smart contract is a piece of code that runs on blockchain. You can think of it as an **"automatically executed contract"**. Once deployed on-chain, it runs automatically according to code logic, and no one can tamper with it.
|
||||
|
||||
NFTs are created and managed through smart contracts. When you "mint" an NFT, you are actually calling a function in the smart contract to write on-chain: "NFT #0 belongs to your wallet address."
|
||||
|
||||
We will use **Solidity** to write the contract. Do not worry. With ready-made templates from OpenZeppelin, you only need to write fewer than 15 lines of code.
|
||||
|
||||
## 1.3 What NFT Are We Minting?
|
||||
|
||||
We will mint a **"Vibe Coder Learning Certificate"** NFT to prove you completed this tutorial and learned blockchain development basics. This NFT will:
|
||||
|
||||
* have a unique token ID
|
||||
* be recorded on Ethereum Sepolia testnet
|
||||
* be viewable and displayable on OpenSea testnet
|
||||
* (optional) include your custom image
|
||||
|
||||
Of course, you can change it to any theme you like: AI-generated artwork, event souvenir card, pixel avatar, and more. The NFT content is fully up to you.
|
||||
|
||||
## 1.4 Why Use a Testnet?
|
||||
|
||||
Ethereum has "mainnet" and "testnet":
|
||||
|
||||
| Comparison | Mainnet | Testnet (Sepolia) |
|
||||
|------|----------------|------------------|
|
||||
| ETH value | Real money | Free to claim, no real value |
|
||||
| Deployment cost | Requires real gas fees | Completely free |
|
||||
| Use case | Production release | Learning, testing, development |
|
||||
| Functional difference | None | Same as mainnet |
|
||||
|
||||
Testnet and mainnet are functionally the same. The only difference is that testnet ETH has no real value. So you can safely learn and experiment on testnet without worrying about spending money.
|
||||
|
||||
## 1.5 Tutorial Roadmap
|
||||
|
||||
We will complete the flow in these steps:
|
||||
|
||||
1. **Prepare wallet and test ETH** (2 minutes): install MetaMask and claim free test ETH
|
||||
2. **Write and deploy contract** (4 minutes): write NFT contract in Remix IDE and deploy to Sepolia
|
||||
3. **Mint NFT and check result** (4 minutes): call contract to mint NFT and verify on OpenSea and Etherscan
|
||||
4. **Advanced: add image to NFT** (optional): store image on IPFS to make NFT complete
|
||||
|
||||
# Chapter 2: Prepare Wallet and Test ETH (2 Minutes)
|
||||
|
||||
## 2.1 Install MetaMask Wallet
|
||||
|
||||
MetaMask is the most popular Ethereum wallet. It is a browser extension that lets you interact with blockchain apps.
|
||||
|
||||
1. Open Chrome and visit [MetaMask official site](https://metamask.io/)
|
||||
2. Click **"Download"** and install the Chrome extension
|
||||
3. After installation, click the MetaMask fox icon in the top-right corner
|
||||
4. Choose **"Create a new wallet"** and set a password
|
||||
5. **Important**: keep your recovery phrase (12 words) safe. Losing a test wallet is fine, but good habits matter
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 2.2 Switch to Sepolia Testnet
|
||||
|
||||
MetaMask connects to Ethereum mainnet by default. We need to switch to Sepolia testnet:
|
||||
|
||||
1. Click the network dropdown at the top of MetaMask (default: "Ethereum Mainnet")
|
||||
2. Click **"Show test networks"**
|
||||
3. Select **"Sepolia test network"**
|
||||
|
||||
If you do not see Sepolia, click **"Add network"** and add manually:
|
||||
|
||||
| Config Item | Value |
|
||||
|-------|-----|
|
||||
| Network Name | Sepolia test network |
|
||||
| RPC URL | `https://rpc.sepolia.org` |
|
||||
| Chain ID | 11155111 |
|
||||
| Currency Symbol | SepoliaETH |
|
||||
| Block Explorer | `https://sepolia.etherscan.io` |
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 2.3 Claim Free Test ETH
|
||||
|
||||
Deploying contracts and minting NFTs requires gas fees. On testnet, gas is paid with test ETH, which is free.
|
||||
|
||||
Visit any faucet below and input your wallet address to claim free Sepolia ETH:
|
||||
|
||||
| Faucet | URL | Per-claim Amount | Login Required |
|
||||
|--------|------|-----------|------------|
|
||||
| QuickNode | `https://faucet.quicknode.com/ethereum/sepolia` | 0.1 ETH | Yes |
|
||||
| Alchemy | `https://www.alchemy.com/faucets/ethereum-sepolia` | 0.1 ETH | Yes |
|
||||
| Google Cloud | `https://cloud.google.com/application/web3/faucet/ethereum/sepolia` | 0.05 ETH | Yes (Google account) |
|
||||
|
||||
> **Tip**: 0.1 test ETH is enough for deploying a contract and minting dozens of NFTs. If one faucet fails, try another.
|
||||
|
||||
After claiming successfully, return to MetaMask and your balance should change from 0 to 0.1 ETH (it may take a few seconds).
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 3: Write and Deploy NFT Smart Contract (4 Minutes)
|
||||
|
||||
## 3.1 Open Remix IDE
|
||||
|
||||
Remix is the official Ethereum-recommended online smart contract development environment. It runs fully in the browser and requires no installation.
|
||||
|
||||
Open: **https://remix.ethereum.org/**
|
||||
|
||||
You will see a VS Code-like interface: file explorer on the left, code editor in the middle, and compile/deploy panel on the right.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.2 Create Contract File
|
||||
|
||||
1. In the left file explorer, click the **"contracts"** folder
|
||||
2. Click the **"+"** button above to create a new file
|
||||
3. Name it **`MySimpleNFT.sol`**
|
||||
4. Paste the code below:
|
||||
|
||||
```solidity
|
||||
// SPDX-License-Identifier: MIT
|
||||
pragma solidity ^0.8.20;
|
||||
|
||||
// Import OpenZeppelin official secure ERC721 template
|
||||
import "@openzeppelin/contracts/token/ERC721/ERC721.sol";
|
||||
|
||||
// Simplest NFT contract: name, symbol, mint function only
|
||||
contract MySimpleNFT is ERC721 {
|
||||
uint256 private _tokenId;
|
||||
|
||||
// Initialize collection name and symbol
|
||||
constructor() ERC721("VibeCoder", "VIBE") {}
|
||||
|
||||
// Mint NFT: call once to mint one token to caller
|
||||
function mint() public {
|
||||
_safeMint(msg.sender, _tokenId);
|
||||
_tokenId++;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Code walkthrough (fewer than 15 lines, and each line is understandable):**
|
||||
|
||||
| Code | Meaning |
|
||||
|------|------|
|
||||
| `pragma solidity ^0.8.20` | Specify Solidity compiler version |
|
||||
| `import "@openzeppelin/..."` | Import OpenZeppelin ERC721 standard implementation (security-audited template) |
|
||||
| `contract MySimpleNFT is ERC721` | Create a contract inheriting ERC721 standard |
|
||||
| `ERC721("VibeCoder", "VIBE")` | Set collection name "VibeCoder" and symbol "VIBE" |
|
||||
| `_safeMint(msg.sender, _tokenId)` | Mint a new NFT to caller |
|
||||
| `_tokenId++` | Increment token ID after each mint |
|
||||
|
||||
> **What is ERC721?** It is the NFT standard on Ethereum, defining basic NFT capabilities (transfer, owner query, etc.). OpenZeppelin provides a security-audited implementation, so we can inherit directly instead of building from scratch.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.3 Compile the Contract
|
||||
|
||||
1. Click **"Solidity Compiler"** in the left panel (hammer icon)
|
||||
2. Select compiler version **0.8.20** (or higher in 0.8.x)
|
||||
3. Click **"Compile MySimpleNFT.sol"**
|
||||
4. A green check ✅ means compilation succeeded
|
||||
|
||||
> If there is an error, check whether Solidity version matches and OpenZeppelin import path is correct. Remix automatically downloads OpenZeppelin dependencies from npm.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.4 Deploy Contract to Sepolia Testnet
|
||||
|
||||
1. Click **"Deploy & Run Transactions"** in the left panel (Ethereum icon)
|
||||
2. Set **Environment** to **"Injected Provider - MetaMask"**
|
||||
- This auto-connects your MetaMask wallet
|
||||
- MetaMask will pop up a connection request, click **"Connect"**
|
||||
3. Confirm network is **Sepolia (11155111)**
|
||||
4. Select **MySimpleNFT** in Contract dropdown
|
||||
5. Click **"Deploy"**
|
||||
6. MetaMask pops up transaction confirmation, click **"Confirm"** (gas is very low; testnet is free)
|
||||
|
||||
After a few seconds, when deployment succeeds, the **"Deployed Contracts"** section below will show your contract address. **Copy and save this address**; you will need it later.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 4: Mint NFT and Verify Result (4 Minutes)
|
||||
|
||||
## 4.1 Mint Your First NFT
|
||||
|
||||
After successful deployment, in the **"Deployed Contracts"** section in Remix, you will see the contract interaction panel.
|
||||
|
||||
1. Expand the contract panel and find the **"mint"** button (orange)
|
||||
2. Click **"mint"** directly (no input parameters required)
|
||||
3. MetaMask pops up transaction confirmation, click **"Confirm"**
|
||||
4. Wait a few seconds for completion
|
||||
|
||||
Congratulations! You just minted NFT #0, and it now belongs to your wallet address.
|
||||
|
||||
You can continue clicking "mint" to create more. Token IDs auto-increment each time (#1, #2, #3...).
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 4.2 Verify Mint Result
|
||||
|
||||
**Method 1: Verify in Remix**
|
||||
|
||||
In the contract panel, find **"balanceOf"** (blue button), input your wallet address, and call it. If it returns `1` (or the number you minted), minting succeeded.
|
||||
|
||||
You can also call **"ownerOf"**, input `0` (token ID), and it returns your wallet address, proving NFT #0 belongs to you.
|
||||
|
||||
**Method 2: Verify on Etherscan (recommended)**
|
||||
|
||||
1. Open [Sepolia Etherscan](https://sepolia.etherscan.io/)
|
||||
2. Paste your **contract address** into search
|
||||
3. You will see the contract details page with all transaction records
|
||||
4. Click **"Token Tracker"** to view all NFTs minted by your contract
|
||||
|
||||
On Etherscan, every mint transaction has complete records: who minted, when minted, and token ID. This is the charm of blockchain being "public, transparent, and tamper-resistant."
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 5: Advanced - Add an Image to NFT (Optional)
|
||||
|
||||
The NFTs minted so far only have IDs, without image or description. To make NFTs complete, we need **IPFS (InterPlanetary File System)** to store images and metadata.
|
||||
|
||||
## 5.1 What Is IPFS?
|
||||
|
||||
IPFS is a decentralized file storage network. Unlike regular cloud storage, files on IPFS do not depend on one server, but are distributed across global nodes. This means:
|
||||
|
||||
* files are not lost if one server goes down
|
||||
* file content is uniquely identified by hashes and cannot be tampered with
|
||||
* it is ideal for storing NFT images and metadata
|
||||
|
||||
## 5.2 Upload Image to Pinata
|
||||
|
||||
[Pinata](https://pinata.cloud/) is the most popular IPFS storage service. The free tier provides 1GB storage, which is enough for us.
|
||||
|
||||
1. Visit https://pinata.cloud/ and register a free account
|
||||
2. After login, click **"Upload"** -> **"File"**
|
||||
3. Select the image you want as NFT artwork (AI-generated image is fine, or any image)
|
||||
4. After upload succeeds, copy the **CID** (a string like `QmXyz...`)
|
||||
|
||||
Your image URI is: `ipfs://yourCID`
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 5.3 Create Metadata JSON
|
||||
|
||||
NFT metadata is a JSON file describing NFT name, description, and image URI. Create a `metadata.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Vibe Coder Certificate #0",
|
||||
"description": "This NFT certifies that the holder has completed the NFT minting tutorial and entered the world of Web3.",
|
||||
"image": "ipfs://your-image-cid",
|
||||
"attributes": [
|
||||
{ "trait_type": "Course", "value": "Easy Vibe" },
|
||||
{ "trait_type": "Skill", "value": "Smart Contract" },
|
||||
{ "trait_type": "Level", "value": "Beginner" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Upload `metadata.json` to Pinata too, and get a metadata CID.
|
||||
|
||||
## 5.4 Upgrade Contract to Support Images
|
||||
|
||||
To include images in NFTs, we need to slightly upgrade the contract by adding `tokenURI`. Go back to Remix and create a new file `MyNFTWithImage.sol`:
|
||||
|
||||
```solidity
|
||||
// SPDX-License-Identifier: MIT
|
||||
pragma solidity ^0.8.20;
|
||||
|
||||
import "@openzeppelin/contracts/token/ERC721/ERC721.sol";
|
||||
import "@openzeppelin/contracts/token/ERC721/extensions/ERC721URIStorage.sol";
|
||||
|
||||
contract MyNFTWithImage is ERC721, ERC721URIStorage {
|
||||
uint256 private _tokenId;
|
||||
|
||||
constructor() ERC721("VibeCoder", "VIBE") {}
|
||||
|
||||
// Pass metadata URI when minting
|
||||
function mint(string memory uri) public {
|
||||
_safeMint(msg.sender, _tokenId);
|
||||
_setTokenURI(_tokenId, uri);
|
||||
_tokenId++;
|
||||
}
|
||||
|
||||
// Overrides required by Solidity
|
||||
function tokenURI(uint256 tokenId)
|
||||
public view override(ERC721, ERC721URIStorage)
|
||||
returns (string memory)
|
||||
{
|
||||
return super.tokenURI(tokenId);
|
||||
}
|
||||
|
||||
function supportsInterface(bytes4 interfaceId)
|
||||
public view override(ERC721, ERC721URIStorage)
|
||||
returns (bool)
|
||||
{
|
||||
return super.supportsInterface(interfaceId);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
After deployment, call `mint` and pass your metadata URI (for example `ipfs://QmAbc.../metadata.json`). Then your minted NFT will include image and description.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 6: Final Notes
|
||||
|
||||
Congratulations! You have completed a full NFT development loop from scratch. Let's recap:
|
||||
|
||||
1. Understood core concepts of NFTs and smart contracts
|
||||
2. Installed MetaMask and switched to Sepolia testnet
|
||||
3. Wrote an NFT smart contract with fewer than 15 lines in Remix IDE
|
||||
4. Deployed the contract to Ethereum testnet
|
||||
5. Minted your own NFT and verified it on Etherscan
|
||||
6. (Optional) Learned how to add image and metadata with IPFS
|
||||
|
||||
The whole process required no local environment installation, cost no money, and was completed fully in the browser. This is the appeal of blockchain development: the barrier is much lower than most people expect.
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Use Hardhat / Foundry for local development**: when contract logic becomes complex, Remix is not enough. Hardhat and Foundry are professional local frameworks with automated testing, script-based deployment, gas optimization, and more
|
||||
* **Add whitelist and mint limits**: control who can mint, max mints per wallet, mint price, and similar rules
|
||||
* **Build a mint frontend**: use React + ethers.js / viem to build a polished mint page for one-click web minting
|
||||
* **Explore ERC1155 multi-edition NFTs**: ERC1155 allows multiple copies under one token ID, useful for game items and tickets
|
||||
* **Deploy to mainnet**: when ready, deploy to Ethereum mainnet (or L2 chains like Polygon or Base with lower gas fees)
|
||||
|
||||
***Your first NFT is already on-chain. The door to the blockchain world is now open.***
|
||||
|
||||
# References
|
||||
|
||||
* [OpenZeppelin ERC721 Docs](https://docs.openzeppelin.com/contracts/5.x/erc721)
|
||||
* [Remix IDE Official Docs](https://remix-ide.readthedocs.io/)
|
||||
* [MetaMask Official Docs](https://docs.metamask.io/)
|
||||
* [Solidity Official Docs](https://docs.soliditylang.org/)
|
||||
* [Sepolia Etherscan](https://sepolia.etherscan.io/)
|
||||
* [Pinata IPFS Storage Service](https://pinata.cloud/)
|
||||
* [ERC721 Standard Spec (EIP-721)](https://eips.ethereum.org/EIPS/eip-721)
|
||||
@@ -0,0 +1,891 @@
|
||||
# How to Build a VS Code Extension: Create Your AI Project Assistant
|
||||
|
||||
# Chapter 1: What VS Code Extension Development Is
|
||||
|
||||
In this tutorial, we will complete a full closed loop: build a VS Code extension from scratch that acts as your AI project assistant, with one-click project template generation, AI chat on selected files or code snippets, multi-file Q&A analysis, and custom shortcuts. You will complete development, debugging, and learn how to publish to the VS Code Marketplace.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- Node.js environment (version 18.0+)
|
||||
- VS Code editor (version 1.90+)
|
||||
- Your AI coding assistant (Cursor / Trae / Claude Code)
|
||||
- (Optional) GitHub Copilot subscription (for Language Model API)
|
||||
|
||||
> **Vibe Coding end-to-end**: we will use an AI coding assistant to generate most code. You only need to understand core concepts and architecture, then describe requirements in natural language.
|
||||
|
||||
## 1.1 What Can VS Code Extensions Do?
|
||||
|
||||
You already use VS Code extensions daily. Prettier formats your code, GitLens shows Git history, and GitHub Copilot helps you write code. These extensions are essentially programs written in TypeScript/JavaScript that extend the editor through VS Code APIs.
|
||||
|
||||
VS Code extensions can do much more than many people expect:
|
||||
|
||||
* **Add new UI elements**: sidebar panels, status bar info, custom Webview pages
|
||||
* **Handle files and code**: read, modify, and create files; analyze code structure
|
||||
* **Integrate external services**: call APIs, connect databases, integrate CI/CD
|
||||
* **Extend editor capabilities**: custom language support, code completion, diagnostics
|
||||
* **Add AI capabilities**: create AI assistants with Chat Participant API, call models with Language Model API
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 Core Architecture of a VS Code Extension
|
||||
|
||||
A VS Code extension runs in an isolated **Extension Host** process, separate from the editor main process. This means even if an extension crashes, the editor itself is not affected.
|
||||
|
||||
A typical extension has these core parts:
|
||||
|
||||
* **package.json (manifest)**: extension "ID card," declaring name, entry file, contribution points (`commands`, `menus`, `keybindings`, etc.)
|
||||
* **extension.ts (entry file)**: extension "brain," exporting `activate()` and `deactivate()`
|
||||
* **Contribution Points**: what your extension contributes to VS Code in package.json (commands, menu items, keybindings, views, etc.)
|
||||
* **VS Code API**: the TypeScript API set used to operate editor capabilities
|
||||
|
||||
```text
|
||||
VS Code editor
|
||||
│
|
||||
├── Extension Host (extension process)
|
||||
│ ├── Your extension
|
||||
│ │ ├── package.json -> declares "what I can do"
|
||||
│ │ ├── extension.ts -> implements "how to do it"
|
||||
│ │ └── other modules -> concrete feature code
|
||||
│ ├── Other extension A
|
||||
│ └── Other extension B
|
||||
│
|
||||
└── Editor main process (UI rendering)
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.3 What Extension Are We Building?
|
||||
|
||||
We will build a VS Code extension named **"AI Project Bot"**, an AI project assistant with the following features:
|
||||
|
||||
| Feature | Description |
|
||||
|------|------|
|
||||
| Project templates | Sidebar list of templates, one-click project scaffold generation |
|
||||
| AI chat | `@project-bot` participant in VS Code Chat for project Q&A |
|
||||
| File/snippet chat | Right-click selected code or file and send to AI for analysis/explanation/refactoring |
|
||||
| Multi-file Q&A | Multi-select files in explorer and ask AI to analyze relationships and logic |
|
||||
| Shortcuts | Custom keybindings to trigger common actions quickly |
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.4 Tutorial Roadmap
|
||||
|
||||
We will complete the flow in these steps:
|
||||
|
||||
1. **Create extension project** (3 minutes): scaffold project and understand core files
|
||||
2. **Implement project templates** (5 minutes): use TreeView to show templates in sidebar and generate projects
|
||||
3. **Implement AI Chat participant** (5 minutes): create `@project-bot` via Chat Participant API
|
||||
4. **Implement file/snippet chat and multi-file Q&A** (5 minutes): right-click menus + multi-select analysis
|
||||
5. **Add shortcuts and UX polish** (3 minutes): keybindings and status bar hints
|
||||
6. **Publish to marketplace** (optional): package and submit
|
||||
|
||||
# Chapter 2: Create the Extension Project (3 Minutes)
|
||||
|
||||
## 2.1 Generate Project with Scaffold
|
||||
|
||||
VS Code officially provides a Yeoman scaffold tool. Ask AI to run:
|
||||
|
||||
```text
|
||||
Please help me install VS Code extension scaffolding tools and create a project:
|
||||
1. Install Yeoman and generator-code: npm install -g yo generator-code
|
||||
2. Run yo code and choose:
|
||||
- Type: New Extension (TypeScript)
|
||||
- Name: ai-project-bot
|
||||
- Identifier: ai-project-bot
|
||||
- Description: AI project assistant - template generation, intelligent chat, multi-file Q&A
|
||||
- Package manager: npm
|
||||
3. Enter project directory and install dependencies
|
||||
```
|
||||
|
||||
Generated structure:
|
||||
|
||||
```text
|
||||
ai-project-bot/
|
||||
├── .vscode/
|
||||
│ ├── launch.json # Debug config (F5 starts debugging)
|
||||
│ └── tasks.json # Build tasks
|
||||
├── src/
|
||||
│ └── extension.ts # Extension entry file
|
||||
├── package.json # Extension manifest (most important file)
|
||||
├── tsconfig.json # TypeScript config
|
||||
└── vsc-extension-quickstart.md # Quick start guide (can be removed)
|
||||
```
|
||||
|
||||
## 2.2 Understand package.json: The Extension "ID Card"
|
||||
|
||||
`package.json` is the core file of a VS Code extension. Besides normal npm fields, it has `contributes` to declare everything your extension contributes to VS Code:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "ai-project-bot",
|
||||
"displayName": "AI Project Bot",
|
||||
"description": "AI project assistant - template generation, intelligent chat, multi-file Q&A",
|
||||
"version": "0.0.1",
|
||||
"engines": { "vscode": "^1.90.0" },
|
||||
"activationEvents": [],
|
||||
"main": "./out/extension.js",
|
||||
"contributes": {
|
||||
"commands": [],
|
||||
"menus": {},
|
||||
"keybindings": [],
|
||||
"viewsContainers": {},
|
||||
"views": {},
|
||||
"chatParticipants": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key fields:**
|
||||
|
||||
| Field | Purpose |
|
||||
|------|------|
|
||||
| `engines.vscode` | Minimum supported VS Code version |
|
||||
| `activationEvents` | When extension activates (empty means on-demand activation) |
|
||||
| `main` | Path to compiled entry file |
|
||||
| `contributes` | All contributed features (commands, menus, keybindings, views, etc.) |
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 2.3 Understand extension.ts: The Extension "Brain"
|
||||
|
||||
Open `src/extension.ts` and you will see two core functions:
|
||||
|
||||
```typescript
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
// Called when extension is activated (first command execution, opening specific files, etc.)
|
||||
export function activate(context: vscode.ExtensionContext) {
|
||||
console.log('AI Project Bot activated!')
|
||||
|
||||
// Register commands, views, chat participants, etc.
|
||||
const disposable = vscode.commands.registerCommand(
|
||||
'ai-project-bot.helloWorld',
|
||||
() => {
|
||||
vscode.window.showInformationMessage('Hello from AI Project Bot!')
|
||||
}
|
||||
)
|
||||
|
||||
context.subscriptions.push(disposable)
|
||||
}
|
||||
|
||||
// Called when extension is deactivated (for example when VS Code closes)
|
||||
export function deactivate() {}
|
||||
```
|
||||
|
||||
**Core concepts:**
|
||||
|
||||
* `activate(context)`: extension initialization, register all capabilities here
|
||||
* `context.subscriptions`: an auto-cleanup list; VS Code disposes registered items on deactivation
|
||||
* `vscode.commands.registerCommand`: register command callable from command palette (`Ctrl+Shift+P`)
|
||||
|
||||
## 2.4 Start Debugging
|
||||
|
||||
Press **F5**, and VS Code opens a new **Extension Development Host** window. This is a fresh VS Code instance with your extension loaded.
|
||||
|
||||
In the new window, press **Ctrl+Shift+P**, type "Hello World," and you will see a message popup. This means your extension is running.
|
||||
|
||||
<!--  -->
|
||||
|
||||
> **Debug tip**: after code changes, in Extension Development Host press **Ctrl+Shift+P** -> **Developer: Reload Window** to reload extension quickly.
|
||||
|
||||
# Chapter 3: Implement Project Templates (5 Minutes)
|
||||
|
||||
## 3.1 Design Template System
|
||||
|
||||
We want to add a "Project Templates" panel in VS Code sidebar where users can browse templates and generate project skeletons with one click. This uses VS Code **TreeView API**.
|
||||
|
||||
Ask AI to implement:
|
||||
|
||||
```text
|
||||
Please help me implement project templates in ai-project-bot:
|
||||
|
||||
1. Add contribution points in package.json:
|
||||
- Add a new viewsContainers.activitybar item with id "project-bot", title "AI Project Bot"
|
||||
- Add a view under it with id "projectTemplates", name "Project Templates"
|
||||
- Add command "ai-project-bot.createFromTemplate", title "Create Project from Template"
|
||||
|
||||
2. Create src/templates/templateProvider.ts:
|
||||
- Implement TreeDataProvider with template categories and templates:
|
||||
- Frontend: React + TypeScript, Vue 3 + TypeScript, Next.js App
|
||||
- Backend: Express API, FastAPI Python
|
||||
- Full-stack: T3 Stack (Next.js + tRPC + Prisma)
|
||||
- Each template item shows name, description, and icon
|
||||
|
||||
3. Create src/templates/scaffolder.ts:
|
||||
- Implement createProjectFromTemplate function
|
||||
- Let users choose target folder
|
||||
- Generate project structure by template type
|
||||
```
|
||||
|
||||
## 3.2 Declare View in package.json
|
||||
|
||||
First add sidebar view contributions in `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"contributes": {
|
||||
"viewsContainers": {
|
||||
"activitybar": [
|
||||
{
|
||||
"id": "project-bot",
|
||||
"title": "AI Project Bot",
|
||||
"icon": "resources/bot-icon.svg"
|
||||
}
|
||||
]
|
||||
},
|
||||
"views": {
|
||||
"project-bot": [
|
||||
{
|
||||
"id": "projectTemplates",
|
||||
"name": "Project Templates"
|
||||
}
|
||||
]
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"command": "ai-project-bot.createFromTemplate",
|
||||
"title": "Create Project from Template",
|
||||
"icon": "$(add)"
|
||||
}
|
||||
],
|
||||
"menus": {
|
||||
"view/title": [
|
||||
{
|
||||
"command": "ai-project-bot.createFromTemplate",
|
||||
"when": "view == projectTemplates",
|
||||
"group": "navigation"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This config does three things:
|
||||
|
||||
1. Adds an "AI Project Bot" icon entry in the activity bar
|
||||
2. Creates a "Project Templates" view under that entry
|
||||
3. Adds a "+" button in the view title bar for project creation
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.3 Implement TreeDataProvider
|
||||
|
||||
TreeDataProvider is the interface VS Code uses to fill tree data. We need `getTreeItem` (display info for one node) and `getChildren` (child node list).
|
||||
|
||||
Core code:
|
||||
|
||||
```typescript
|
||||
// src/templates/templateProvider.ts
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
interface Template {
|
||||
name: string
|
||||
description: string
|
||||
category: string
|
||||
command: string // command to generate project, for example "npx create-react-app"
|
||||
}
|
||||
|
||||
const TEMPLATES: Template[] = [
|
||||
{ name: 'React + TypeScript', description: 'React project built with Vite', category: 'Frontend', command: 'npm create vite@latest {{name}} -- --template react-ts' },
|
||||
{ name: 'Vue 3 + TypeScript', description: 'Vue 3 project built with Vite', category: 'Frontend', command: 'npm create vite@latest {{name}} -- --template vue-ts' },
|
||||
{ name: 'Next.js App', description: 'Next.js App Router full-stack project', category: 'Frontend', command: 'npx create-next-app@latest {{name}} --typescript --app' },
|
||||
{ name: 'Express API', description: 'Express + TypeScript REST API', category: 'Backend', command: 'npx create-express-api {{name}}' },
|
||||
{ name: 'FastAPI Python', description: 'Python FastAPI backend project', category: 'Backend', command: 'pip install fastapi uvicorn' },
|
||||
]
|
||||
|
||||
// Tree node: category or template
|
||||
class TemplateItem extends vscode.TreeItem {
|
||||
constructor(
|
||||
public readonly label: string,
|
||||
public readonly collapsibleState: vscode.TreeItemCollapsibleState,
|
||||
public readonly template?: Template
|
||||
) {
|
||||
super(label, collapsibleState)
|
||||
if (template) {
|
||||
this.description = template.description
|
||||
this.tooltip = `${template.name}\n${template.description}\nCommand: ${template.command}`
|
||||
this.contextValue = 'template'
|
||||
this.command = {
|
||||
command: 'ai-project-bot.createFromTemplate',
|
||||
title: 'Create Project',
|
||||
arguments: [template]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class TemplateProvider implements vscode.TreeDataProvider<TemplateItem> {
|
||||
getTreeItem(element: TemplateItem): vscode.TreeItem {
|
||||
return element
|
||||
}
|
||||
|
||||
getChildren(element?: TemplateItem): TemplateItem[] {
|
||||
if (!element) {
|
||||
// Root: return category list
|
||||
const categories = [...new Set(TEMPLATES.map(t => t.category))]
|
||||
return categories.map(
|
||||
cat => new TemplateItem(cat, vscode.TreeItemCollapsibleState.Expanded)
|
||||
)
|
||||
}
|
||||
// Children: templates in category
|
||||
return TEMPLATES
|
||||
.filter(t => t.category === element.label)
|
||||
.map(t => new TemplateItem(t.name, vscode.TreeItemCollapsibleState.None, t))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 3.4 Register View and Create Command
|
||||
|
||||
Register TreeView and project creation command in `extension.ts`:
|
||||
|
||||
```typescript
|
||||
// src/extension.ts
|
||||
import { TemplateProvider } from './templates/templateProvider'
|
||||
|
||||
export function activate(context: vscode.ExtensionContext) {
|
||||
// Register template view
|
||||
const templateProvider = new TemplateProvider()
|
||||
vscode.window.registerTreeDataProvider('projectTemplates', templateProvider)
|
||||
|
||||
// Register create project command
|
||||
const createCmd = vscode.commands.registerCommand(
|
||||
'ai-project-bot.createFromTemplate',
|
||||
async (template) => {
|
||||
if (!template) {
|
||||
// If no template passed (called from command palette), let user pick
|
||||
const pick = await vscode.window.showQuickPick(
|
||||
TEMPLATES.map(t => ({ label: t.name, description: t.description, template: t })),
|
||||
{ placeHolder: 'Choose a project template' }
|
||||
)
|
||||
if (!pick) return
|
||||
template = pick.template
|
||||
}
|
||||
|
||||
// Ask for project name
|
||||
const name = await vscode.window.showInputBox({
|
||||
prompt: 'Enter project name',
|
||||
placeHolder: 'my-awesome-project'
|
||||
})
|
||||
if (!name) return
|
||||
|
||||
// Ask for target folder
|
||||
const folder = await vscode.window.showOpenDialog({
|
||||
canSelectFolders: true,
|
||||
openLabel: 'Select target folder'
|
||||
})
|
||||
if (!folder) return
|
||||
|
||||
// Execute creation command
|
||||
const terminal = vscode.window.createTerminal('AI Project Bot')
|
||||
terminal.show()
|
||||
const cmd = template.command.replace('{{name}}', name)
|
||||
terminal.sendText(`cd "${folder[0].fsPath}" && ${cmd}`)
|
||||
|
||||
vscode.window.showInformationMessage(`Creating ${template.name} project: ${name}`)
|
||||
}
|
||||
)
|
||||
|
||||
context.subscriptions.push(createCmd)
|
||||
}
|
||||
```
|
||||
|
||||
Now press F5 for debugging. You will see AI Project Bot in activity bar. Expand template list and click any template to create a project.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 4: Implement AI Chat Participant (5 Minutes)
|
||||
|
||||
## 4.1 What Is Chat Participant API?
|
||||
|
||||
Starting from VS Code 1.90, extensions can create their own AI assistant in Chat panel using **Chat Participant API**. If user inputs `@project-bot help me analyze this project architecture`, your extension receives the message and returns model-generated response.
|
||||
|
||||
Core concepts:
|
||||
|
||||
* **Participant**: your assistant identity in Chat panel, invoked with `@name`
|
||||
* **Slash Commands**: quick commands supported by participant, such as `/explain`, `/refactor`
|
||||
* **Language Model API**: call built-in models in VS Code (for example Copilot GPT-4o)
|
||||
* **Stream**: progressively output responses through `stream.markdown()`
|
||||
|
||||
## 4.2 Declare Chat Participant in package.json
|
||||
|
||||
Add this in `contributes`:
|
||||
|
||||
```json
|
||||
{
|
||||
"contributes": {
|
||||
"chatParticipants": [
|
||||
{
|
||||
"id": "ai-project-bot.projectBot",
|
||||
"name": "project-bot",
|
||||
"fullName": "AI Project Bot",
|
||||
"description": "Your AI project assistant for code analysis, architecture explanation, and solution generation",
|
||||
"isSticky": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`isSticky: true` means once selected, follow-up messages go to this participant by default, without typing `@project-bot` each time.
|
||||
|
||||
## 4.3 Implement Chat Participant Handler
|
||||
|
||||
Ask AI to write core logic:
|
||||
|
||||
```text
|
||||
Please help me create src/chat/chatParticipant.ts and implement Chat Participant:
|
||||
1. Register participant "ai-project-bot.projectBot"
|
||||
2. Support three slash commands:
|
||||
- /explain: explain selected code or current file
|
||||
- /refactor: provide refactoring suggestions
|
||||
- /template: recommend suitable tech stack templates
|
||||
3. Use Language Model API with VS Code built-in model
|
||||
4. Return response in streaming mode (stream.markdown)
|
||||
```
|
||||
|
||||
Core code:
|
||||
|
||||
```typescript
|
||||
// src/chat/chatParticipant.ts
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
export function registerChatParticipant(context: vscode.ExtensionContext) {
|
||||
const participant = vscode.chat.createChatParticipant(
|
||||
'ai-project-bot.projectBot',
|
||||
async (request, chatContext, stream, token) => {
|
||||
// Select available model
|
||||
const models = await vscode.lm.selectChatModels({ family: 'gpt-4o' })
|
||||
const model = models[0]
|
||||
|
||||
if (!model) {
|
||||
stream.markdown('No language model available. Please make sure GitHub Copilot is installed.')
|
||||
return
|
||||
}
|
||||
|
||||
// Build system prompt by slash command
|
||||
let systemPrompt = 'You are a professional project development assistant.'
|
||||
|
||||
if (request.command === 'explain') {
|
||||
systemPrompt = 'You are a code explanation expert. Please explain user code in concise Chinese, including purpose, logic flow, and key design decisions.'
|
||||
} else if (request.command === 'refactor') {
|
||||
systemPrompt = 'You are a code refactoring expert. Analyze user code and provide specific refactoring suggestions with improved code examples.'
|
||||
} else if (request.command === 'template') {
|
||||
systemPrompt = 'You are a tech stack selection expert. Recommend suitable tech stacks and project templates based on user requirements.'
|
||||
}
|
||||
|
||||
// Build messages
|
||||
const messages = [
|
||||
vscode.LanguageModelChatMessage.User(systemPrompt),
|
||||
vscode.LanguageModelChatMessage.User(request.prompt)
|
||||
]
|
||||
|
||||
// Stream output
|
||||
const response = await model.sendRequest(messages, {}, token)
|
||||
for await (const chunk of response.stream) {
|
||||
stream.markdown(chunk)
|
||||
}
|
||||
|
||||
return { metadata: { command: request.command || '' } }
|
||||
}
|
||||
)
|
||||
|
||||
// Register slash commands
|
||||
participant.slashCommandProvider = {
|
||||
provideSlashCommands: () => [
|
||||
{ name: 'explain', description: 'Explain code function and logic' },
|
||||
{ name: 'refactor', description: 'Provide refactoring suggestions and improvements' },
|
||||
{ name: 'template', description: 'Recommend suitable project templates and tech stacks' }
|
||||
]
|
||||
}
|
||||
|
||||
// Register follow-up suggestions
|
||||
participant.followupProvider = {
|
||||
provideFollowups: (result) => {
|
||||
if (result.metadata?.command === 'explain') {
|
||||
return [
|
||||
{ prompt: 'Can you draw a flowchart?', label: 'Generate flowchart' },
|
||||
{ prompt: 'Any potential bugs here?', label: 'Check potential issues' }
|
||||
]
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
context.subscriptions.push(participant)
|
||||
}
|
||||
```
|
||||
|
||||
Call registration in `extension.ts`:
|
||||
|
||||
```typescript
|
||||
import { registerChatParticipant } from './chat/chatParticipant'
|
||||
|
||||
export function activate(context: vscode.ExtensionContext) {
|
||||
// ... previous template registration code ...
|
||||
registerChatParticipant(context)
|
||||
}
|
||||
```
|
||||
|
||||
Now input `@project-bot /explain what does this code do?` in Chat panel, and your extension will call model and generate explanation.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 5: File/Snippet Chat and Multi-file Q&A (5 Minutes)
|
||||
|
||||
## 5.1 Right-click Menu: Send Selected Code to AI
|
||||
|
||||
We want users to select code in editor and send it to AI from context menu. This uses VS Code **Context Menu** contribution points.
|
||||
|
||||
Add in `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"contributes": {
|
||||
"commands": [
|
||||
{
|
||||
"command": "ai-project-bot.explainSelection",
|
||||
"title": "AI: Explain Selected Code"
|
||||
},
|
||||
{
|
||||
"command": "ai-project-bot.refactorSelection",
|
||||
"title": "AI: Refactor Selected Code"
|
||||
}
|
||||
],
|
||||
"menus": {
|
||||
"editor/context": [
|
||||
{
|
||||
"command": "ai-project-bot.explainSelection",
|
||||
"when": "editorHasSelection",
|
||||
"group": "ai-project-bot@1"
|
||||
},
|
||||
{
|
||||
"command": "ai-project-bot.refactorSelection",
|
||||
"when": "editorHasSelection",
|
||||
"group": "ai-project-bot@2"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Key config notes:**
|
||||
|
||||
* `when: "editorHasSelection"`: show menu only when text is selected
|
||||
* `group: "ai-project-bot@1"`: menu grouping and order (`@1`, `@2`)
|
||||
|
||||
## 5.2 Implement Selected-code Analysis
|
||||
|
||||
```typescript
|
||||
// src/commands/selectionCommands.ts
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
export function registerSelectionCommands(context: vscode.ExtensionContext) {
|
||||
// Explain selected code
|
||||
const explainCmd = vscode.commands.registerCommand(
|
||||
'ai-project-bot.explainSelection',
|
||||
async () => {
|
||||
const editor = vscode.window.activeTextEditor
|
||||
if (!editor) return
|
||||
|
||||
const selection = editor.selection
|
||||
const selectedText = editor.document.getText(selection)
|
||||
const fileName = editor.document.fileName.split('/').pop()
|
||||
const startLine = selection.start.line + 1
|
||||
const endLine = selection.end.line + 1
|
||||
|
||||
// Build prompt with context
|
||||
const prompt = [
|
||||
`Please explain the following code (from ${fileName}, lines ${startLine}-${endLine}):`,
|
||||
'```',
|
||||
selectedText,
|
||||
'```',
|
||||
'Please explain: 1) what this code does 2) core logic 3) possible improvements'
|
||||
].join('\n')
|
||||
|
||||
// Call Language Model API
|
||||
const models = await vscode.lm.selectChatModels({ family: 'gpt-4o' })
|
||||
if (!models.length) {
|
||||
vscode.window.showErrorMessage('No language model available')
|
||||
return
|
||||
}
|
||||
|
||||
// Show results in output panel
|
||||
const outputChannel = vscode.window.createOutputChannel('AI Project Bot')
|
||||
outputChannel.show()
|
||||
outputChannel.appendLine(`\n--- Code Explanation (${fileName}:${startLine}-${endLine}) ---\n`)
|
||||
|
||||
const messages = [
|
||||
vscode.LanguageModelChatMessage.User(prompt)
|
||||
]
|
||||
const response = await models[0].sendRequest(messages, {})
|
||||
for await (const chunk of response.stream) {
|
||||
outputChannel.append(chunk)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
context.subscriptions.push(explainCmd)
|
||||
}
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 5.3 Multi-file Q&A: Batch Analyze File Relationships
|
||||
|
||||
This is one of the most powerful features: multi-select files in explorer and let AI analyze relationship and logic in one click.
|
||||
|
||||
Add explorer context menu in `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"contributes": {
|
||||
"commands": [
|
||||
{
|
||||
"command": "ai-project-bot.analyzeFiles",
|
||||
"title": "AI: Analyze Relationships of Selected Files"
|
||||
}
|
||||
],
|
||||
"menus": {
|
||||
"explorer/context": [
|
||||
{
|
||||
"command": "ai-project-bot.analyzeFiles",
|
||||
"when": "explorerResourceIsFile",
|
||||
"group": "ai-project-bot"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Implement multi-file analysis command:
|
||||
|
||||
```typescript
|
||||
// src/commands/multiFileAnalysis.ts
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
export function registerMultiFileCommands(context: vscode.ExtensionContext) {
|
||||
const analyzeCmd = vscode.commands.registerCommand(
|
||||
'ai-project-bot.analyzeFiles',
|
||||
async (clickedFile: vscode.Uri, selectedFiles: vscode.Uri[]) => {
|
||||
// selectedFiles contains all selected files
|
||||
const files = selectedFiles || [clickedFile]
|
||||
|
||||
if (files.length < 2) {
|
||||
vscode.window.showWarningMessage('Please select at least 2 files for analysis')
|
||||
return
|
||||
}
|
||||
|
||||
// Read all selected files
|
||||
const fileContents: string[] = []
|
||||
for (const file of files) {
|
||||
const content = await vscode.workspace.fs.readFile(file)
|
||||
const fileName = vscode.workspace.asRelativePath(file)
|
||||
fileContents.push(
|
||||
`--- ${fileName} ---\n${Buffer.from(content).toString('utf8')}`
|
||||
)
|
||||
}
|
||||
|
||||
const prompt = [
|
||||
`Please analyze relationships among these ${files.length} files:`,
|
||||
'',
|
||||
...fileContents,
|
||||
'',
|
||||
'Please explain:',
|
||||
'1. Responsibilities of each file',
|
||||
'2. Dependency/call relationships among them',
|
||||
'3. Data flow (if any)',
|
||||
'4. Architectural suggestions or potential issues'
|
||||
].join('\n')
|
||||
|
||||
// Call model and show result
|
||||
const models = await vscode.lm.selectChatModels({ family: 'gpt-4o' })
|
||||
if (!models.length) {
|
||||
vscode.window.showErrorMessage('No language model available')
|
||||
return
|
||||
}
|
||||
|
||||
const outputChannel = vscode.window.createOutputChannel('AI Project Bot')
|
||||
outputChannel.show()
|
||||
outputChannel.appendLine(`\n--- Multi-file Analysis (${files.length} files) ---\n`)
|
||||
|
||||
const messages = [
|
||||
vscode.LanguageModelChatMessage.User(prompt)
|
||||
]
|
||||
const response = await models[0].sendRequest(messages, {})
|
||||
for await (const chunk of response.stream) {
|
||||
outputChannel.append(chunk)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
context.subscriptions.push(analyzeCmd)
|
||||
}
|
||||
```
|
||||
|
||||
Usage: in explorer, hold `Ctrl` (`Cmd` on Mac) to multi-select files, right-click and choose "AI: Analyze Relationships of Selected Files." AI reads all selected files and returns analysis.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 6: Shortcuts and UX Optimization (3 Minutes)
|
||||
|
||||
## 6.1 Custom Keybindings
|
||||
|
||||
Shortcuts are key to efficiency. Add in `package.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"contributes": {
|
||||
"keybindings": [
|
||||
{
|
||||
"command": "ai-project-bot.explainSelection",
|
||||
"key": "ctrl+shift+e",
|
||||
"mac": "cmd+shift+e",
|
||||
"when": "editorTextFocus && editorHasSelection"
|
||||
},
|
||||
{
|
||||
"command": "ai-project-bot.refactorSelection",
|
||||
"key": "ctrl+shift+r",
|
||||
"mac": "cmd+shift+r",
|
||||
"when": "editorTextFocus && editorHasSelection"
|
||||
},
|
||||
{
|
||||
"command": "ai-project-bot.createFromTemplate",
|
||||
"key": "ctrl+shift+n",
|
||||
"mac": "cmd+shift+n",
|
||||
"when": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**`when` conditions:**
|
||||
|
||||
| Condition | Meaning |
|
||||
|------|------|
|
||||
| `editorTextFocus` | Cursor is in editor |
|
||||
| `editorHasSelection` | Some text is selected |
|
||||
| `explorerViewletVisible` | Explorer panel is visible |
|
||||
| `!editorReadonly` | File is not read-only |
|
||||
|
||||
Multiple conditions connected by `&&` mean all must be satisfied.
|
||||
|
||||
## 6.2 Status Bar Hint
|
||||
|
||||
Add a quick status bar entry so users always know extension is running:
|
||||
|
||||
```typescript
|
||||
// src/statusBar.ts
|
||||
import * as vscode from 'vscode'
|
||||
|
||||
export function createStatusBarItem(context: vscode.ExtensionContext) {
|
||||
const statusBar = vscode.window.createStatusBarItem(
|
||||
vscode.StatusBarAlignment.Right,
|
||||
100
|
||||
)
|
||||
statusBar.text = '$(hubot) AI Bot'
|
||||
statusBar.tooltip = 'Click to open AI Project Bot'
|
||||
statusBar.command = 'ai-project-bot.createFromTemplate'
|
||||
statusBar.show()
|
||||
|
||||
context.subscriptions.push(statusBar)
|
||||
}
|
||||
```
|
||||
|
||||
`$(hubot)` is VS Code built-in icon syntax. You can find all icons in [Codicon library](https://microsoft.github.io/vscode-codicons/dist/codicon.html).
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 7: Publish to Marketplace (Optional)
|
||||
|
||||
## 7.1 Prepare for Publishing
|
||||
|
||||
VS Code extensions are packaged and published with **vsce**:
|
||||
|
||||
```text
|
||||
Please help me install vsce: npm install -g @vscode/vsce
|
||||
```
|
||||
|
||||
Before publishing, prepare:
|
||||
|
||||
1. **Azure DevOps account**: register and create an organization at [dev.azure.com](https://dev.azure.com/)
|
||||
2. **Personal Access Token (PAT)**: create in Azure DevOps with permission **Marketplace -> Manage**
|
||||
3. **Publisher ID**: create publisher identity in [VS Code Marketplace](https://marketplace.visualstudio.com/manage)
|
||||
|
||||
## 7.2 Improve package.json Metadata
|
||||
|
||||
Add metadata before publishing:
|
||||
|
||||
```json
|
||||
{
|
||||
"publisher": "your-publisher-id",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/yourname/ai-project-bot"
|
||||
},
|
||||
"categories": ["AI", "Other"],
|
||||
"keywords": ["ai", "project", "template", "chat"],
|
||||
"icon": "resources/icon.png",
|
||||
"galleryBanner": {
|
||||
"color": "#1e1e2e",
|
||||
"theme": "dark"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You also need a `README.md` for marketplace description and a `CHANGELOG.md` for version history.
|
||||
|
||||
## 7.3 Package and Publish
|
||||
|
||||
```bash
|
||||
# Package to .vsix (manual install file)
|
||||
vsce package
|
||||
|
||||
# Publish to marketplace
|
||||
vsce publish
|
||||
```
|
||||
|
||||
After packaging, you get `ai-project-bot-0.0.1.vsix`. You can send this file to friends and they can install via VS Code "Install from VSIX."
|
||||
|
||||
For official marketplace publishing, run `vsce publish`; the extension usually appears within minutes.
|
||||
|
||||
<!--  -->
|
||||
|
||||
> **Tip**: first release may require review. Make sure README is clear and screenshots are complete to speed up approval.
|
||||
|
||||
# Chapter 8: Final Notes
|
||||
|
||||
Congratulations! You have built a fully functional VS Code extension from scratch. Recap:
|
||||
|
||||
1. Created extension project with Yeoman scaffold and understood roles of `package.json` and `extension.ts`
|
||||
2. Implemented sidebar project template list with TreeView API and one-click project creation
|
||||
3. Created `@project-bot` AI assistant with Chat Participant API, including slash commands and streaming responses
|
||||
4. Implemented right-click code selection analysis
|
||||
5. Implemented multi-file relationship analysis
|
||||
6. Added custom shortcuts and status bar hint
|
||||
|
||||
The imagination space of VS Code extension development is huge. The tech behind the useful extensions you use every day is exactly what you just learned.
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Custom Webview panels**: build fully custom UI with HTML/CSS/JS, such as visual architecture graphs and interactive code review interfaces
|
||||
* **Language Model Tools**: register custom tools callable by AI, such as querying database or executing API requests
|
||||
* **Diagnostics and CodeLens**: show AI suggestions, performance hints, and security warnings inline
|
||||
* **Custom language support**: provide syntax highlighting, completion, and diagnostics for DSLs or specific config formats
|
||||
* **Remote development integration**: make extension work in SSH, containers, and WSL
|
||||
|
||||
***Your editor, your rules.***
|
||||
|
||||
# References
|
||||
|
||||
* [VS Code Extension API Docs](https://code.visualstudio.com/api)
|
||||
* [Chat Participant API Guide](https://code.visualstudio.com/api/extension-guides/chat)
|
||||
* [Language Model API Guide](https://code.visualstudio.com/api/extension-guides/language-model)
|
||||
* [TreeView API Guide](https://code.visualstudio.com/api/extension-guides/tree-view)
|
||||
* [Webview API Guide](https://code.visualstudio.com/api/extension-guides/webview)
|
||||
* [VS Code Extension Publishing Guide](https://code.visualstudio.com/api/working-with-extensions/publishing-extension)
|
||||
* [Codicon Icon Library](https://microsoft.github.io/vscode-codicons/dist/codicon.html)
|
||||
@@ -0,0 +1,693 @@
|
||||
# How to Build an Industrial Qt Desktop App: Pump Monitoring HMI System
|
||||
|
||||
# Chapter 1: What Industrial HMI and Qt Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: build an industrial-grade pump monitoring HMI (Human-Machine Interface) system from scratch with Qt. It can read sensor data in real time, draw pressure trend charts, trigger automatic over-threshold alarms, and record fault logs. The whole process uses free simulation software on a PC instead of real industrial hardware.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- A computer (Windows or Mac, Windows recommended for better industrial software compatibility)
|
||||
- Qt 6.5 development environment (Qt Creator + Qt Serial Bus + Qt Charts modules)
|
||||
- Modbus Slave simulation software (free download, works as a "virtual pump")
|
||||
- Your AI coding assistant (Cursor / Trae / Claude Code)
|
||||
|
||||
> **Zero hardware, zero cost**: use free PC simulation software (Modbus Slave) as the lower-level device; no need to buy hardware. Use official Qt `QModbusTcpClient` + Qt Charts modules directly, no manual protocol parsing needed. After running, you will see real-time pressure trends, over-threshold alarm popups, and fault logs, matching real factory workflow.
|
||||
|
||||
## 1.1 What Are Upper Computer and Lower Computer?
|
||||
|
||||
In industrial automation, there are two concepts you must understand: **upper computer** and **lower computer**.
|
||||
|
||||
**Lower Computer**: the "hands and feet" on-site
|
||||
|
||||
The lower computer is the controller that directly interacts with physical devices. In factories, it is usually a **PLC (Programmable Logic Controller)** or **sensor**, responsible for:
|
||||
|
||||
* reading field data (temperature, pressure, flow, liquid level, etc.)
|
||||
* controlling device actions (start pump, close valve, adjust speed, etc.)
|
||||
* running predefined logic automatically (for example stop pump when pressure exceeds threshold)
|
||||
|
||||
You can think of the lower computer as a "worker" on the factory floor. It does not need complex thinking, but must execute tasks reliably.
|
||||
|
||||
**Upper Computer**: the "eyes and brain" in the control room
|
||||
|
||||
The upper computer is monitoring software running on PC or industrial computer, which is the **HMI (Human-Machine Interface)** we will build today. It is responsible for:
|
||||
|
||||
* displaying field data in real time (numbers, charts, animations)
|
||||
* recording historical data and alarm logs
|
||||
* enabling remote control for operators
|
||||
* providing data analysis and reports
|
||||
|
||||
You can think of the upper computer as the factory's "monitoring center." Operators can understand plant status from the screen.
|
||||
|
||||
**How do they communicate?**
|
||||
|
||||
Upper and lower computers exchange data through **industrial communication protocols**. The most common one is **Modbus**, a "veteran" protocol born in 1979. It is still widely used because it is simple, reliable, and supported by almost all industrial devices.
|
||||
|
||||
```text
|
||||
Control room Factory site
|
||||
┌──────────┐ Modbus protocol ┌──────────┐
|
||||
│ Upper │ ◄──────────────────► │ Lower │
|
||||
│ computer │ "Tell me pressure" │ computer │
|
||||
│ (Qt HMI) │ "Pressure is 1.20MPa"│ (PLC/Sensor)
|
||||
│ Display │ │ Read data│
|
||||
│ Log data │ │ Control │
|
||||
│ Alarms │ │ Protect │
|
||||
└──────────┘ └──────────┘
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 What Is Modbus Protocol?
|
||||
|
||||
Modbus is the "common language" of industrial communication. It defines how upper and lower computers "talk."
|
||||
|
||||
**Only two core concepts:**
|
||||
|
||||
* **Register**: data "cells" in the lower computer. Each has an address (`0`, `1`, `2`, ...), storing a number. For example, address `0` stores pressure and address `1` stores temperature.
|
||||
* **Read/Write operations**: upper computer can read registers (get data) or write registers (send control commands).
|
||||
|
||||
**Two common Modbus variants:**
|
||||
|
||||
| Variant | Transport | Typical Scenario |
|
||||
|------|---------|---------|
|
||||
| Modbus RTU | Serial (RS-485/RS-232) | Short distance, direct device connection |
|
||||
| Modbus TCP | Ethernet (TCP/IP) | Long distance, network communication |
|
||||
|
||||
This tutorial uses **Modbus TCP**. Since it is network-based, upper-computer app and lower-computer simulator can run on the same machine with no physical wiring.
|
||||
|
||||
## 1.3 Why Choose Qt?
|
||||
|
||||
Qt is a top framework choice for industrial software. Many monitoring interfaces in factories, hospitals, and transportation systems are built with Qt. The reasons are simple:
|
||||
|
||||
| Advantage | Explanation |
|
||||
|------|------|
|
||||
| Cross-platform | One codebase compiles to Windows, Linux, and embedded devices |
|
||||
| Built-in industrial protocol support | Qt Serial Bus supports Modbus natively, no third-party library required |
|
||||
| Powerful charting | Qt Charts provides professional real-time charts |
|
||||
| High performance | C++ foundation suitable for real-time data refresh |
|
||||
| Mature and stable | 30-year history, proven in industrial domain |
|
||||
|
||||
## 1.4 What Are We Building?
|
||||
|
||||
We will build a **Pump Monitoring HMI System** simulating real factory pump pressure monitoring:
|
||||
|
||||
| Function | Description |
|
||||
|------|------|
|
||||
| Real-time data reading | Read pressure from lower computer every second |
|
||||
| Pressure trend chart | Line chart for last 60 seconds of pressure |
|
||||
| Over-threshold alarm | Popup warning and red UI when pressure exceeds threshold |
|
||||
| Fault log | Record all alarm events in database for history queries |
|
||||
| Manual control | One-click start/stop pump (write lower-computer register) |
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.5 Tutorial Roadmap
|
||||
|
||||
We will complete the flow in these steps:
|
||||
|
||||
1. **Prepare environment and simulated lower computer** (2 minutes): install Qt 6.5 and Modbus Slave simulator
|
||||
2. **Create Qt project and connect Modbus** (3 minutes): establish communication between upper app and simulator
|
||||
3. **Implement real-time read and display** (3 minutes): timed pressure reads and UI updates
|
||||
4. **Draw real-time pressure trend chart** (3 minutes): dynamic line chart with Qt Charts
|
||||
5. **Implement alarm and fault logs** (3 minutes): over-threshold alarm + SQLite logging
|
||||
6. **Package and deploy** (optional): package app into standalone executable
|
||||
|
||||
# Chapter 2: Prepare Environment and Simulated Lower Computer (2 Minutes)
|
||||
|
||||
## 2.1 Install Qt 6.5
|
||||
|
||||
Qt provides a free open-source version, enough for this tutorial.
|
||||
|
||||
1. Visit [Qt official site](https://www.qt.io/download-qt-installer) and download Qt Online Installer
|
||||
2. Run installer, log in or register Qt account (free)
|
||||
3. In component selection, check:
|
||||
- **Qt 6.5.x** (or newer)
|
||||
- **Qt Serial Bus** under **Additional Libraries** (Modbus support)
|
||||
- **Qt Charts** under **Additional Libraries** (chart rendering)
|
||||
- **Qt Creator** (IDE, usually selected by default)
|
||||
4. Click install and wait
|
||||
|
||||
> **Tip**: if Qt is already installed but missing Serial Bus or Charts, rerun Qt Maintenance Tool and add components.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 2.2 Install Modbus Slave: Your "Virtual Pump"
|
||||
|
||||
Modbus Slave is a free Modbus slave simulator. It can simulate an industrial device (PLC/sensor) on your computer so your upper app has something to communicate with.
|
||||
|
||||
1. Visit [modbustools.com](https://www.modbustools.com/modbus_slave.html) and download Modbus Slave
|
||||
2. Install and open it
|
||||
3. Configure connection:
|
||||
- Menu **Connection -> Connect**
|
||||
- Choose **Modbus TCP/IP**
|
||||
- IP address: `127.0.0.1` (localhost)
|
||||
- Port: `502` (default Modbus TCP port)
|
||||
- Click **OK** to listen
|
||||
|
||||
4. Set simulated data:
|
||||
- You will see a register table, each row is a register address (`0`, `1`, `2`, ...)
|
||||
- Double-click value at address **0**, change to **120** (means pressure 1.20 MPa, divided by 100 in app)
|
||||
- Double-click value at address **1**, change to **350** (means temperature 35.0°C)
|
||||
- Double-click value at address **2**, change to **1** (pump state: `1=running`, `0=stopped`)
|
||||
|
||||
Now Modbus Slave is your "24/7 virtual pump." Keep the window open; it will continuously respond to read/write requests.
|
||||
|
||||
<!--  -->
|
||||
|
||||
> **Dynamic simulation tip**: Modbus Slave supports auto increment/random changes. Right-click register value and choose "Auto increment" or "Random" to simulate realistic sensor fluctuations.
|
||||
|
||||
# Chapter 3: Create Qt Project and Connect Modbus (3 Minutes)
|
||||
|
||||
## 3.1 Create New Qt Project
|
||||
|
||||
Open Qt Creator and create a new project:
|
||||
|
||||
1. Click **File -> New Project**
|
||||
2. Choose **Application (Qt) -> Qt Widgets Application**
|
||||
3. Project name: **PumpHMI**
|
||||
4. Select installed Qt 6.5 kit
|
||||
5. Finish creation
|
||||
|
||||
Open `PumpHMI.pro` (or `CMakeLists.txt` if using CMake), and add key modules:
|
||||
|
||||
```pro
|
||||
QT += core gui widgets serialbus charts sql
|
||||
```
|
||||
|
||||
| Module | Purpose |
|
||||
|------|------|
|
||||
| `serialbus` | Provides `QModbusTcpClient` for Modbus TCP communication |
|
||||
| `charts` | Provides `QChart`, `QLineSeries` for real-time trend chart |
|
||||
| `sql` | Provides `QSqlDatabase` for SQLite fault logs |
|
||||
|
||||
If using CMake, equivalent config:
|
||||
|
||||
```cmake
|
||||
find_package(Qt6 REQUIRED COMPONENTS Widgets SerialBus Charts Sql)
|
||||
target_link_libraries(PumpHMI PRIVATE
|
||||
Qt6::Widgets Qt6::SerialBus Qt6::Charts Qt6::Sql)
|
||||
```
|
||||
|
||||
## 3.2 Declare Core Members
|
||||
|
||||
Ask AI to generate header file:
|
||||
|
||||
```text
|
||||
Please help me write mainwindow.h with core members for pump monitoring HMI:
|
||||
1. QModbusTcpClient for Modbus TCP communication
|
||||
2. QTimer for timed data reading
|
||||
3. QChart + QLineSeries for real-time trend chart
|
||||
4. QSqlDatabase for fault log storage
|
||||
5. UI elements: pressure label, status indicator, start/stop button, log table
|
||||
```
|
||||
|
||||
Core header:
|
||||
|
||||
```cpp
|
||||
// mainwindow.h
|
||||
#ifndef MAINWINDOW_H
|
||||
#define MAINWINDOW_H
|
||||
|
||||
#include <QMainWindow>
|
||||
#include <QModbusTcpClient>
|
||||
#include <QModbusDataUnit>
|
||||
#include <QTimer>
|
||||
#include <QtCharts>
|
||||
#include <QSqlDatabase>
|
||||
#include <QLabel>
|
||||
#include <QPushButton>
|
||||
#include <QTableWidget>
|
||||
|
||||
class MainWindow : public QMainWindow {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit MainWindow(QWidget *parent = nullptr);
|
||||
~MainWindow();
|
||||
|
||||
private slots:
|
||||
void connectModbus(); // connect lower computer
|
||||
void readPressure(); // timed pressure read
|
||||
void onReadReady(); // read callback
|
||||
void triggerAlarm(float v); // trigger alarm
|
||||
void togglePump(); // start/stop pump
|
||||
|
||||
private:
|
||||
// Modbus communication
|
||||
QModbusTcpClient *m_modbusClient = nullptr;
|
||||
QTimer *m_pollTimer = nullptr;
|
||||
|
||||
// Real-time chart
|
||||
QChart *m_chart = nullptr;
|
||||
QLineSeries *m_series = nullptr;
|
||||
QDateTimeAxis *m_axisX = nullptr;
|
||||
QValueAxis *m_axisY = nullptr;
|
||||
|
||||
// Database
|
||||
QSqlDatabase m_db;
|
||||
|
||||
// UI elements
|
||||
QLabel *m_pressureLabel = nullptr; // pressure display
|
||||
QLabel *m_statusLight = nullptr; // status indicator
|
||||
QPushButton *m_pumpButton = nullptr; // start/stop button
|
||||
QTableWidget *m_logTable = nullptr; // log table
|
||||
|
||||
// Alarm threshold
|
||||
float m_alarmThreshold = 1.50f; // alarm above 1.50 MPa
|
||||
bool m_pumpRunning = false;
|
||||
|
||||
void setupUI();
|
||||
void setupDatabase();
|
||||
void logAlarm(float pressure, const QString &message);
|
||||
};
|
||||
|
||||
#endif // MAINWINDOW_H
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.3 Build Modbus TCP Connection
|
||||
|
||||
Implement connection logic in `mainwindow.cpp`:
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - connection section
|
||||
void MainWindow::connectModbus()
|
||||
{
|
||||
m_modbusClient = new QModbusTcpClient(this);
|
||||
|
||||
// Connect to Modbus Slave simulator
|
||||
m_modbusClient->setConnectionParameter(
|
||||
QModbusDevice::NetworkPortParameter, 502);
|
||||
m_modbusClient->setConnectionParameter(
|
||||
QModbusDevice::NetworkAddressParameter, "127.0.0.1");
|
||||
m_modbusClient->setTimeout(1000); // 1s timeout
|
||||
m_modbusClient->setNumberOfRetries(3); // retry 3 times
|
||||
|
||||
if (!m_modbusClient->connectDevice()) {
|
||||
statusBar()->showMessage("Failed to connect lower computer!", 3000);
|
||||
return;
|
||||
}
|
||||
|
||||
statusBar()->showMessage("Connected to lower computer (127.0.0.1:502)", 3000);
|
||||
|
||||
// Start timer, read once per second
|
||||
m_pollTimer = new QTimer(this);
|
||||
connect(m_pollTimer, &QTimer::timeout, this, &MainWindow::readPressure);
|
||||
m_pollTimer->start(1000); // 1000ms = 1s
|
||||
}
|
||||
```
|
||||
|
||||
**Code notes:**
|
||||
|
||||
| Code | Meaning |
|
||||
|------|------|
|
||||
| `QModbusTcpClient` | Built-in Qt Modbus TCP client, communicates with lower computer |
|
||||
| `NetworkPortParameter, 502` | Connect to port `502` (same as Modbus Slave config) |
|
||||
| `NetworkAddressParameter, "127.0.0.1"` | Connect localhost (simulator runs locally) |
|
||||
| `m_pollTimer->start(1000)` | Call `readPressure()` every second |
|
||||
|
||||
## 3.4 Read Pressure Data
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - reading section
|
||||
void MainWindow::readPressure()
|
||||
{
|
||||
if (!m_modbusClient || m_modbusClient->state() != QModbusDevice::ConnectedState)
|
||||
return;
|
||||
|
||||
// Build read request: start at address 0, read 3 holding registers
|
||||
QModbusDataUnit readUnit(
|
||||
QModbusDataUnit::HoldingRegisters, // register type
|
||||
0, // start address
|
||||
3 // quantity
|
||||
);
|
||||
|
||||
// Send async read request
|
||||
if (auto *reply = m_modbusClient->sendReadRequest(readUnit, 1)) {
|
||||
if (!reply->isFinished()) {
|
||||
connect(reply, &QModbusReply::finished,
|
||||
this, &MainWindow::onReadReady);
|
||||
} else {
|
||||
delete reply; // broadcast request, delete directly
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MainWindow::onReadReady()
|
||||
{
|
||||
auto *reply = qobject_cast<QModbusReply *>(sender());
|
||||
if (!reply) return;
|
||||
|
||||
if (reply->error() == QModbusDevice::NoError) {
|
||||
const QModbusDataUnit unit = reply->result();
|
||||
|
||||
// Parse values (divide register value for real units)
|
||||
float pressure = unit.value(0) / 100.0f; // addr 0: pressure (MPa)
|
||||
float temperature = unit.value(1) / 10.0f; // addr 1: temperature (°C)
|
||||
int pumpStatus = unit.value(2); // addr 2: pump state
|
||||
|
||||
// Update UI
|
||||
m_pressureLabel->setText(
|
||||
QString("%1 MPa").arg(pressure, 0, 'f', 2));
|
||||
|
||||
// Check alarm
|
||||
if (pressure > m_alarmThreshold) {
|
||||
triggerAlarm(pressure);
|
||||
}
|
||||
|
||||
// Update trend chart (implemented next chapter)
|
||||
// updateChart(pressure);
|
||||
|
||||
} else {
|
||||
statusBar()->showMessage(
|
||||
QString("Read failed: %1").arg(reply->errorString()), 2000);
|
||||
}
|
||||
|
||||
reply->deleteLater();
|
||||
}
|
||||
```
|
||||
|
||||
**Modbus reading flow:**
|
||||
|
||||
```text
|
||||
readPressure() triggered by timer
|
||||
-> Build QModbusDataUnit ("read addresses 0-2")
|
||||
-> sendReadRequest() async send (UI not blocked)
|
||||
-> lower computer returns data
|
||||
-> onReadReady() triggered
|
||||
-> parse register values and update UI
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 4: Draw Real-time Pressure Trend (3 Minutes)
|
||||
|
||||
## 4.1 Initialize Chart
|
||||
|
||||
Qt Charts provides professional chart components. Ask AI to initialize in constructor:
|
||||
|
||||
```text
|
||||
Please help me initialize Qt Charts real-time line chart in MainWindow constructor:
|
||||
1. Create QChart and QLineSeries
|
||||
2. X axis uses QDateTimeAxis, showing latest 60 seconds
|
||||
3. Y axis uses QValueAxis, range 0-3.0 MPa
|
||||
4. Line color blue, width 2px
|
||||
5. Place chart into QChartView and add to layout
|
||||
```
|
||||
|
||||
Core code:
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - chart initialization
|
||||
void MainWindow::setupChart()
|
||||
{
|
||||
m_series = new QLineSeries();
|
||||
m_series->setName("Pressure (MPa)");
|
||||
m_series->setPen(QPen(QColor("#2196F3"), 2));
|
||||
|
||||
m_chart = new QChart();
|
||||
m_chart->addSeries(m_series);
|
||||
m_chart->setTitle("Real-time Pressure Trend");
|
||||
m_chart->setAnimationOptions(QChart::NoAnimation); // no animation for real-time data
|
||||
|
||||
// X axis: time
|
||||
m_axisX = new QDateTimeAxis();
|
||||
m_axisX->setFormat("HH:mm:ss");
|
||||
m_axisX->setTitleText("Time");
|
||||
m_chart->addAxis(m_axisX, Qt::AlignBottom);
|
||||
m_series->attachAxis(m_axisX);
|
||||
|
||||
// Y axis: pressure
|
||||
m_axisY = new QValueAxis();
|
||||
m_axisY->setRange(0, 3.0);
|
||||
m_axisY->setTitleText("Pressure (MPa)");
|
||||
m_axisY->setLabelFormat("%.1f");
|
||||
m_chart->addAxis(m_axisY, Qt::AlignLeft);
|
||||
m_series->attachAxis(m_axisY);
|
||||
|
||||
// Create chart view
|
||||
QChartView *chartView = new QChartView(m_chart);
|
||||
chartView->setRenderHint(QPainter::Antialiasing);
|
||||
|
||||
// Add to layout (assuming existing centralLayout)
|
||||
centralLayout->addWidget(chartView);
|
||||
}
|
||||
```
|
||||
|
||||
## 4.2 Update Chart in Real Time
|
||||
|
||||
Whenever a new pressure value is read, append one point and keep only latest 60 seconds:
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - chart updates
|
||||
void MainWindow::updateChart(float pressure)
|
||||
{
|
||||
QDateTime now = QDateTime::currentDateTime();
|
||||
|
||||
// Append new point
|
||||
m_series->append(now.toMSecsSinceEpoch(), pressure);
|
||||
|
||||
// Keep only latest 60s data
|
||||
QDateTime cutoff = now.addSecs(-60);
|
||||
while (m_series->count() > 0 &&
|
||||
m_series->at(0).x() < cutoff.toMSecsSinceEpoch()) {
|
||||
m_series->remove(0);
|
||||
}
|
||||
|
||||
// Update X axis range: always show latest 60s
|
||||
m_axisX->setRange(cutoff, now);
|
||||
}
|
||||
```
|
||||
|
||||
Then call it in `onReadReady()`:
|
||||
|
||||
```cpp
|
||||
// Add after pressure parsing in onReadReady():
|
||||
updateChart(pressure);
|
||||
```
|
||||
|
||||
Now run the program. You will see a blue line updating in real time, one point per second, always showing latest 60 seconds. If you modify register values in Modbus Slave manually, the line reflects changes immediately.
|
||||
|
||||
<!--  -->
|
||||
|
||||
> **Performance tip**: `QChart::NoAnimation` is important. Real-time data refresh every second; animations can cause UI lag. This is a common industrial HMI practice.
|
||||
|
||||
# Chapter 5: Alarm System and Fault Logs (3 Minutes)
|
||||
|
||||
## 5.1 Over-threshold Alarm
|
||||
|
||||
When pressure exceeds threshold, we need: red UI warning + popup alert + log record.
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - alarm logic
|
||||
void MainWindow::triggerAlarm(float pressure)
|
||||
{
|
||||
// Turn UI red
|
||||
m_pressureLabel->setStyleSheet(
|
||||
"color: white; background-color: #F44336;"
|
||||
"font-size: 32px; padding: 10px; border-radius: 8px;");
|
||||
|
||||
// Status indicator red
|
||||
m_statusLight->setStyleSheet(
|
||||
"background-color: #F44336; border-radius: 12px;"
|
||||
"min-width: 24px; min-height: 24px;");
|
||||
|
||||
// Popup alarm (only first time crossing threshold to avoid repeated popups)
|
||||
static bool alarmActive = false;
|
||||
if (!alarmActive) {
|
||||
alarmActive = true;
|
||||
QMessageBox::warning(this, "Pressure Alarm",
|
||||
QString("Current pressure %1 MPa exceeds threshold %2 MPa!\nPlease check pump status immediately.")
|
||||
.arg(pressure, 0, 'f', 2)
|
||||
.arg(m_alarmThreshold, 0, 'f', 2));
|
||||
}
|
||||
|
||||
// Record to DB
|
||||
logAlarm(pressure,
|
||||
QString("Pressure over threshold: %1 MPa > %2 MPa")
|
||||
.arg(pressure, 0, 'f', 2)
|
||||
.arg(m_alarmThreshold, 0, 'f', 2));
|
||||
|
||||
// Reset when pressure returns to normal
|
||||
if (pressure <= m_alarmThreshold) {
|
||||
alarmActive = false;
|
||||
m_pressureLabel->setStyleSheet(
|
||||
"color: #2196F3; font-size: 32px; padding: 10px;");
|
||||
m_statusLight->setStyleSheet(
|
||||
"background-color: #4CAF50; border-radius: 12px;"
|
||||
"min-width: 24px; min-height: 24px;");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 5.2 SQLite Fault Logs
|
||||
|
||||
Industrial systems must log all alarm events for traceability. We use SQLite:
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - database initialization
|
||||
void MainWindow::setupDatabase()
|
||||
{
|
||||
m_db = QSqlDatabase::addDatabase("QSQLITE");
|
||||
m_db.setDatabaseName("pump_alarm_log.db");
|
||||
|
||||
if (!m_db.open()) {
|
||||
qWarning() << "Cannot open database:" << m_db.lastError().text();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create alarm table
|
||||
QSqlQuery query;
|
||||
query.exec(
|
||||
"CREATE TABLE IF NOT EXISTS alarm_log ("
|
||||
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
" timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,"
|
||||
" pressure REAL,"
|
||||
" message TEXT"
|
||||
")"
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
## 5.3 Log and Display Records
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - write logs
|
||||
void MainWindow::logAlarm(float pressure, const QString &message)
|
||||
{
|
||||
// Write to DB
|
||||
QSqlQuery query;
|
||||
query.prepare(
|
||||
"INSERT INTO alarm_log (pressure, message) VALUES (?, ?)");
|
||||
query.addBindValue(pressure);
|
||||
query.addBindValue(message);
|
||||
query.exec();
|
||||
|
||||
// Update on-screen table
|
||||
int row = m_logTable->rowCount();
|
||||
m_logTable->insertRow(row);
|
||||
m_logTable->setItem(row, 0,
|
||||
new QTableWidgetItem(
|
||||
QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss")));
|
||||
m_logTable->setItem(row, 1,
|
||||
new QTableWidgetItem(QString::number(pressure, 'f', 2)));
|
||||
m_logTable->setItem(row, 2,
|
||||
new QTableWidgetItem(message));
|
||||
|
||||
// Auto-scroll to latest row
|
||||
m_logTable->scrollToBottom();
|
||||
}
|
||||
```
|
||||
|
||||
Log table has three columns: time, pressure value, and alarm message. Each alarm appends one row and is persisted to SQLite.
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 5.4 Manually Start/Stop Pump
|
||||
|
||||
Besides reading data, upper computer should control lower computer too. We do this by writing register values:
|
||||
|
||||
```cpp
|
||||
// mainwindow.cpp - pump control
|
||||
void MainWindow::togglePump()
|
||||
{
|
||||
if (!m_modbusClient || m_modbusClient->state() != QModbusDevice::ConnectedState)
|
||||
return;
|
||||
|
||||
m_pumpRunning = !m_pumpRunning;
|
||||
|
||||
// Build write request: write 1 (start) or 0 (stop) to address 2
|
||||
QModbusDataUnit writeUnit(
|
||||
QModbusDataUnit::HoldingRegisters, 2, 1);
|
||||
writeUnit.setValue(0, m_pumpRunning ? 1 : 0);
|
||||
|
||||
if (auto *reply = m_modbusClient->sendWriteRequest(writeUnit, 1)) {
|
||||
connect(reply, &QModbusReply::finished, this, [this, reply]() {
|
||||
if (reply->error() == QModbusDevice::NoError) {
|
||||
m_pumpButton->setText(m_pumpRunning ? "Stop Pump" : "Start Pump");
|
||||
m_pumpButton->setStyleSheet(m_pumpRunning
|
||||
? "background-color: #F44336; color: white; padding: 12px;"
|
||||
: "background-color: #4CAF50; color: white; padding: 12px;");
|
||||
statusBar()->showMessage(
|
||||
m_pumpRunning ? "Pump started" : "Pump stopped", 2000);
|
||||
}
|
||||
reply->deleteLater();
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In Modbus Slave, you will see address `2` switching between `0` and `1` as you click the button. This is the upper-computer "control" process.
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 6: Packaging and Deployment (Optional)
|
||||
|
||||
## 6.1 Package with windeployqt / macdeployqt
|
||||
|
||||
Qt provides official deployment tools to collect required dynamic libraries automatically.
|
||||
|
||||
**Windows:**
|
||||
|
||||
```bash
|
||||
# Build Release first, then run in build directory:
|
||||
windeployqt PumpHMI.exe
|
||||
```
|
||||
|
||||
`windeployqt` copies Qt DLLs, plugins, translation files, etc. next to the executable. That packaged folder can be sent directly.
|
||||
|
||||
**macOS:**
|
||||
|
||||
```bash
|
||||
macdeployqt PumpHMI.app -dmg
|
||||
```
|
||||
|
||||
This generates a `.dmg` installer image.
|
||||
|
||||
## 6.2 Build Installer with Qt Installer Framework
|
||||
|
||||
If you want a professional setup wizard ("Next -> Next -> Finish"), use Qt Installer Framework:
|
||||
|
||||
```text
|
||||
Please help me create an installer for PumpHMI with Qt Installer Framework:
|
||||
1. Create installer directory structure (config, packages)
|
||||
2. Configure config.xml (installer name, version, target directory)
|
||||
3. Put windeployqt output files into packages/com.example.pumphmi/data/
|
||||
4. Run binarycreator to generate installer
|
||||
```
|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 7: Final Notes
|
||||
|
||||
Congratulations! You have built an industrial-grade pump monitoring HMI system from scratch. Recap:
|
||||
|
||||
1. Understood core concepts of upper computer, lower computer, and Modbus protocol
|
||||
2. Simulated a "virtual pump" with Modbus Slave, with no real hardware
|
||||
3. Built upper-lower communication using Qt `QModbusTcpClient`
|
||||
4. Drew real-time rolling pressure trend chart with Qt Charts
|
||||
5. Implemented over-threshold popup alarms and SQLite fault logs
|
||||
6. Implemented remote start/stop pump control
|
||||
|
||||
The whole process used no real industrial hardware, but the architecture and functions match real factory HMI systems. If you replace Modbus Slave with a real PLC, this app can be used in production scenarios directly.
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Multi-device monitoring**: connect multiple lower computers and use tabs/split views for different device data
|
||||
* **Historical playback**: read historical data from SQLite and replay trend charts with timeline controls
|
||||
* **OPC UA protocol**: Modbus fits simpler scenarios; complex industrial systems often use OPC UA, also supported by Qt (Qt OPC UA module)
|
||||
* **Web remote monitoring**: use Qt WebSocket to push real-time data to browser for mobile viewing
|
||||
* **AI predictive maintenance**: feed historical pressure data to ML models to predict failures in advance
|
||||
|
||||
***Use code to protect every device in industrial operations.***
|
||||
|
||||
# References
|
||||
|
||||
* [Qt Serial Bus Docs](https://doc.qt.io/qt-6/qtserialbus-index.html)
|
||||
* [Qt Modbus TCP Client Example](https://doc.qt.io/qt-6/qtserialbus-modbus-client-example.html)
|
||||
* [Qt Charts Docs](https://doc.qt.io/qt-6/qtcharts-index.html)
|
||||
* [Modbus Protocol Specs](https://modbus.org/specs.php)
|
||||
* [Modbus Slave Simulator](https://www.modbustools.com/modbus_slave.html)
|
||||
* [Qt Installer Framework Docs](https://doc.qt.io/qtinstallerframework/)
|
||||
@@ -0,0 +1,470 @@
|
||||
# How to Build the Simplest WeChat Mini Program
|
||||
|
||||
# 1. What WeChat Mini Programs and Mini Program Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: from an idea in your mind to a real mini program that can be searched and opened by QR code inside WeChat.
|
||||
|
||||
Before we start building, we need to establish two basic understandings.
|
||||
|
||||
The first is **essence**: what exactly is a WeChat mini program? How is it different from a normal app or website? Why do so many products choose this format? Only when you understand the core logic can you judge whether your idea fits a mini program.
|
||||
|
||||
The second is **path**: when you say "I want to build a mini program," what does the full path from zero to launch look like? What are the key nodes on that path - what to think about during ideation, how to set up environment, how AI-assisted development improves efficiency, what pitfalls appear in simulator debugging, and what test accounts vs formal release each solve. If you run through this process mentally first, you will not get lost during implementation.
|
||||
|
||||
After these two questions are clear, we can formally enter development. Let us start with the first question: what exactly is a WeChat mini program?
|
||||
|
||||
## 1.1 WeChat Mini Program
|
||||
|
||||
A WeChat mini program can be seen as an app living inside WeChat. You do not need to search in an app store, download, or install. Users can search by name in WeChat, scan a QR code, or open a shared card and use it immediately. After use, they just close it. It does not permanently occupy phone home screen or storage.
|
||||
|
||||
For regular users, mini programs solve many "small tasks": checking delivery, ordering coffee, viewing orders, playing a quick game. Fast startup and unified entry inside WeChat are its biggest experience traits.
|
||||
|
||||
For companies and developers, mini programs are a searchable and shareable "small app format." As long as you register on WeChat Official Platform, complete settings, and pass review, your mini program can open to all WeChat users. Compared with traditional apps, it is easier to get the first batch of users because people are already used to doing many tasks in WeChat.
|
||||
|
||||
In this tutorial, we will not build a complex business system. We choose a classic example: Snake game. It is small and logically clear, yet includes the complete elements a mini program should have: multiple pages, simple interactions, state changes, score recording, etc. It is perfect as your first project.
|
||||
|
||||
## 1.2 WeChat Mini Program Development
|
||||
|
||||
After understanding "what mini programs are," the next question is: what does developing one actually involve?
|
||||
|
||||
You need a clear goal (for example, a Snake game users can play anytime), design the interface users will see, define what should happen under different actions, and finally publish it.
|
||||
|
||||
In traditional development, programmers usually lead all these steps and write a lot of code. In AI-assisted development, this can be split more clearly: you explain what you want, and AI helps with most implementation details. That means for beginners, the most important skill is no longer memorizing syntax, but clearly describing requirements and understanding AI output.
|
||||
|
||||
## 1.3 Several Ways to Develop WeChat Mini Programs
|
||||
|
||||
In real projects, people use different technical routes. To avoid overwhelming you with terms at the beginning, we will only do a rough classification so you understand the common paths.
|
||||
|
||||
The first way is using official native capabilities directly. After creating a project in WeChat DevTools, you will see a fixed set of file types used to describe page structure, styles, and logic. This way stays close to official docs and gives strong control, but for first-time frontend learners, the learning curve is a bit steeper.
|
||||
|
||||
The second way is using cross-end frameworks, such as uni-app. You mainly write web-like code locally (for example `.vue` files), and the framework converts this code to formats WeChat mini programs can run. The advantage is unified structure. If you later publish to other platforms (such as H5 or App), changes are relatively smaller.
|
||||
|
||||
Based on these two methods, this tutorial focuses on mini program SOP using AI-assisted tools. For example, open the whole project in Trae and tell built-in AI directly: "Please add a homepage with title and button in this file" or "Please create a game page that shows snake and score." AI will generate new code snippets or modify/refactor existing code based on current project context.
|
||||
|
||||
These three ways are not mutually exclusive. You can absolutely build in a uni-app project while using Trae AI for most coding work. The key is not picking one method, but knowing where you are now and what tools are available.
|
||||
|
||||
## 1.4 WeChat Mini Program Steps Covered in This Article (High-level Preview)
|
||||
|
||||
This tutorial follows a rhythm from **environment to final product**. Around the Snake example and Trae vibecoding style, we split the process into a reusable route. In later chapters, you will go through these stages:
|
||||
|
||||
1. Build conceptual foundation: understand what mini programs are, what common development methods exist, and who this Snake mini program is for and in what scenarios it is used.
|
||||
2. Prepare environment: register mini program account, install HBuilderX, Trae, and WeChat DevTools, then create a basic project skeleton with HBuilderX that can run in WeChat DevTools and show the simplest page first.
|
||||
3. Enter formal development: open project in Trae, use vibecoding dialog with AI to generate homepage and game page layout step by step, and implement core gameplay such as snake movement, eating food, and game over.
|
||||
4. After core features run, learn to use AI as a "debugging and refactoring partner": ask it to diagnose bugs, tidy structure when code gets messy, and gradually add details such as start/pause, high-score record, and UI polishing.
|
||||
5. Enter publishing: build project into WeChat-recognizable version, preview and test on real devices in WeChat DevTools, launch first with test account and experience version for process validation, then complete filing and review before formal release so others can search and play your mini program.
|
||||
|
||||
This section only draws the full map and does not expand commands or code details yet. For now, remember these 5 steps: **Understand -> Setup environment -> Vibecoding development -> Debug and polish -> Build and release**. Later chapters will zoom into each step, showing what to prepare, what to say to AI, and what results you should see on screen at each stage.
|
||||
|
||||
# 2. Environment Preparation
|
||||
|
||||
Before writing any line of code, let us prepare the environment first.
|
||||
The goal of this part is to make sure you no longer get stuck on **where to download tools and why things cannot run**, so you can focus directly on AI dialog and requirement implementation.
|
||||
|
||||
If you can open a browser, download files, and double-click installers, you can complete this section.
|
||||
|
||||
## 2.1 Three Tools Used in This Tutorial
|
||||
|
||||
For Snake mini program development, we use three tools together, each with different responsibilities:
|
||||
|
||||
1. The first is Trae. Think of it as an AI-integrated code editor. It can open project files like a normal IDE and also let you chat with AI in natural language to generate, modify, and explain code. Most "build mini program with AI" operations in this tutorial happen in Trae. Download latest version from https://www.trae.cn .
|
||||
2. The second is HBuilderX. It has strong support for Vue and uni-app, and offers ready-made mini program templates. We use it to "one-click generate" a base mini program project - this is laying the foundation before handing it to Trae + AI for further iteration. Download from https://www.dcloud.io/hbuilderx.html .
|
||||
3. The third is WeChat DevTools. This official tool is used to develop and preview mini programs. It runs your project on desktop and supports real-device debugging on mobile. Download from https://developers.weixin.qq.com/miniprogram/dev/devtools/download.html .
|
||||
|
||||
In short: HBuilderX creates base project quickly, Trae helps you code with AI, and WeChat DevTools shows the actual running mini program.
|
||||
|
||||
## 2.2 Register WeChat Official Platform Account and Get AppID
|
||||
|
||||
With tools ready, you still need a **mini program identity**, which is created on WeChat Official Platform.
|
||||
If you have never registered a mini program before, follow this order:
|
||||
|
||||
1. Enter https://mp.weixin.qq.com in your browser, open WeChat Official Platform, and login by scanning QR code with WeChat.
|
||||
|
||||

|
||||
|
||||
2. Choose "Mini Program" on homepage and complete registration prompts, including email, phone number, and entity type (individual or enterprise).
|
||||

|
||||
3. After successful registration, enter backend, find "Development Management" or "Development Settings," and you will see a unique ID named AppID. This is your mini program identity and will be used in project config later.
|
||||
|
||||

|
||||
|
||||
It is recommended to save AppID where easy to find. In later sections, we will fill this value directly to map local project to your online mini program.
|
||||
|
||||
## 2.3 Install WeChat DevTools
|
||||
|
||||
Next we need a place to actually run and preview mini programs. That is exactly what WeChat DevTools is for.
|
||||
|
||||
1. Visit download page https://developers.weixin.qq.com/miniprogram/dev/devtools/download.html .
|
||||
On this page you will see versions for different operating systems. Usually choose the stable version matching your system, such as Windows 64-bit or macOS.
|
||||
2. After download, double-click installer and follow wizard step by step. If unsure, keep default options.
|
||||
3. After installation, launch WeChat DevTools from desktop or start menu. On first launch, it shows a QR code and asks you to scan with WeChat. Scan and authorize to enter main interface.
|
||||
|
||||

|
||||
|
||||
Later, after project files are ready in Trae, we will import the built mini program into WeChat DevTools and view real running results here.
|
||||
|
||||
## 2.4 Prepare Trae and HBuilderX
|
||||
|
||||
Finally, install the two tools used for actual coding: Trae and HBuilderX.
|
||||
|
||||
You can **install Trae first**. Visit https://www.trae.cn in browser and download the right version for your OS. Installation is like normal software: double-click installer and follow prompts. After install, you get an IDE that can open local folders, inspect code, and chat with AI. All later vibecoding steps happen here.
|
||||
|
||||

|
||||
|
||||
**Then install HBuilderX**. Visit https://www.dcloud.io/hbuilderx.html and download your OS package. HBuilderX is lightweight and starts quickly. After install, you can briefly look at interface; no need deep feature study now. In later chapters, we use it to create a uni-app mini program template as project starting point.
|
||||
|
||||

|
||||
|
||||
After finishing this section, your environment is complete: you have a mini program account + AppID, a runtime preview tool, and an AI coding IDE. Next we start from **creating the first project skeleton** and make these tools really run.
|
||||
|
||||
## 2.5 Prepare Base Files
|
||||
|
||||
1. Click "New Project".
|
||||
|
||||

|
||||
|
||||
2. Choose default template, set mini program name, select storage path, then click create in lower-right corner:
|
||||
|
||||

|
||||
|
||||
3. Creation success screen appears:
|
||||
|
||||

|
||||
|
||||
4. Then find this folder in file system, open it in Trae, and you will see foundation files are all ready:
|
||||
|
||||

|
||||
|
||||
# 3. Mini Program Development
|
||||
|
||||
In the first two parts, we already clarified "what mini programs are" and "how to set up tools and environment." From this section, we enter hands-on practice: not just concepts, but AI actually helping you build Snake mini program from zero.
|
||||
|
||||
In this section, you will walk through a complete SOP for the development phase, roughly including:
|
||||
|
||||
1. Open current project in Trae and give AI your first complete instruction so it designs and implements a runnable Snake version based on current skeleton.
|
||||
2. Let Trae modify real project files directly, not only output "example code," and learn to use rollback to restore previous state when needed.
|
||||
3. Return to HBuilderX and WeChat DevTools, run to mini program simulator, and play this version in simulator to switch from "code perspective" to "user perspective."
|
||||
4. Based on play results, keep proposing modifications in natural language and let AI iterate controls from button-based to joystick-based, while experiencing a full loop of "find issue -> describe issue -> AI fixes -> verify again."
|
||||
|
||||
You can choose to design every page and button before development.
|
||||
But for complete beginners, interface and interaction design itself is also a new domain (later we will show AI-assisted design). So in this round we intentionally use another way: start first - let AI generate a runnable version, then refine gradually by viewing effects and chatting in natural language.
|
||||
|
||||
## 3.1 Explain Requirements Clearly in One Shot: Give Trae the First "Master Prompt"
|
||||
|
||||
After opening prepared mini program project in Trae, I did not rush to edit a specific line. Instead, I told built-in AI assistant:
|
||||
|
||||
**I gave AI a command: based on current framework, build a Snake mini program. Please design this mini program and write me a prompt.**
|
||||
|
||||
In other words, I did not ask it to "write one function step by step." I first threw out a complete goal, let AI help plan, and AI not only planned but also directly landed the first implementation.
|
||||
|
||||
After receiving this instruction, Trae reads current project structure, determines where to add pages and where to add logic, and directly modifies project files/code. You do not need to hand-write code or manually create/modify folders.
|
||||
|
||||
## 3.2 Let AI Modify Real Code Automatically, Not Manual Coding
|
||||
|
||||
When you execute this instruction in Trae, AI enters a "project editing" flow. During this process, you can observe key points:
|
||||
|
||||
1. It explains its thinking in chat area, for example which directories it will add pages to and how it will organize game logic.
|
||||
|
||||

|
||||
|
||||
2. It directly edits real project files, instead of only giving "sample code" for copy-paste.
|
||||
3. After finishing, Trae outputs a short summary telling you what files were changed and what was done.
|
||||
|
||||
If you are not satisfied with this round (or think something is wrong), no need to panic. Trae provides rollback in the top-left outside chat box. You can restore project state before this instruction with one click - like a safety undo key.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## 3.3 View Effects in HBuilderX and WeChat DevTools
|
||||
|
||||
After AI completes the first development round, code has been written into project, but you still have not seen real player-side effect.
|
||||
Next we need to run it.
|
||||
|
||||
Specific operation: go back to HBuilderX, find top menu "Run," select "Run to Mini Program Simulator" -> "WeChat DevTools." This triggers project build and opens result in WeChat DevTools.
|
||||
|
||||

|
||||
|
||||
The output panel at bottom shows build process. If final state is "ready" with no errors, build is successful. Then switch to WeChat DevTools to check UI and features of this version.
|
||||
|
||||

|
||||
|
||||
In most cases, HBuilderX auto-opens WeChat DevTools and you can directly see updated mini program. If not auto-opened, do this:
|
||||
|
||||
1. Stop current run in HBuilderX first.
|
||||
2. Launch WeChat DevTools manually and keep it open.
|
||||
3. Back in HBuilderX, click "Run -> Run to Mini Program Simulator -> WeChat DevTools" again.
|
||||
|
||||
Then you can see the vibecoding mini program in WeChat DevTools:
|
||||
|
||||

|
||||
|
||||
## 3.4 Use Natural Language to Repeatedly Adjust Until Satisfied
|
||||
|
||||
In this practice, AI initially generated a button-controlled Snake: four direction buttons on screen, and snake changes direction when clicked. It is fully playable, but I personally prefer joystick control. For your adjustment requests (not only features, but also UI design and layout; once experienced, you can even ask AI to integrate external model APIs or databases), again: you only need to describe requirements in natural language.
|
||||
|
||||
This is the core advantage of vibecoding: you do not have to dig into code for event binding or coordinate logic. You directly tell AI what you want. For example, in Trae chat you can write:
|
||||
|
||||
Replace buttons with joystick control. When user releases joystick, snake should keep moving in current direction until next joystick action.
|
||||
|
||||
As long as requirement is clear, AI will automatically locate target files and modify control styles, interaction bindings, and direction handling logic.
|
||||
|
||||

|
||||
|
||||
After modification, return to WeChat DevTools to check.
|
||||
If changes are not visible immediately, click "Run" in DevTools or refresh preview window to apply latest build. If still not updated, stop run in HBuilderX and run to simulator again, then you can see updated mini program:
|
||||
|
||||

|
||||
|
||||
## 3.5 What If Problems Appear: Keep Communicating in Natural Language
|
||||
|
||||
AI-generated versions are not always perfect at first. You may encounter:
|
||||
|
||||
- runtime errors and app fails to open;
|
||||
- features mostly correct, but details differ from your expectation;
|
||||
- UI usable but still not visually pleasing or convenient enough.
|
||||
|
||||
At these moments, no need to blindly edit code yourself. Describe problems directly to Trae AI assistant in natural language, for example:
|
||||
|
||||
"Joystick control works now, but snake sometimes suddenly stops. Please check current implementation."
|
||||
Or: "Game is playable now, but interface feels crowded. I want more vertical spacing on mobile. Please adjust layout."
|
||||
|
||||
AI will use current project context + your description, then provide and apply code changes directly. If result becomes worse or direction is wrong, you can still rollback to previous stable version and try another wording.
|
||||
|
||||
Through several such rounds, you can polish from "rough first version" to a joystick-based Snake closer to your preference.
|
||||
For example, I gave a style reference image and asked AI to adjust UI style accordingly:
|
||||
|
||||

|
||||
|
||||
## 3.6 Final Result and Section Summary
|
||||
|
||||
After repeated rounds of **natural language description -> AI modification -> preview in WeChat DevTools -> continue micro-adjustment**, I finally got this result:
|
||||
|
||||
- complete game page;
|
||||
- snake moves smoothly and eats food;
|
||||
- joystick control supported;
|
||||
- runs correctly in mini program simulator.
|
||||
|
||||
Final product examples:
|
||||
|
||||

|
||||
|
||||
In this section, you have seen a complete closed loop:
|
||||
|
||||
1. In Trae, one clear instruction let AI build first Snake mini program version;
|
||||
2. With HBuilderX + WeChat DevTools, validate real effect from user perspective;
|
||||
3. Keep proposing modifications in natural language, let AI handle feature and UI optimization;
|
||||
4. When issues appear, use rollback + rerun to keep process safe.
|
||||
|
||||
Next, you can use same rhythm for your own ideas: not limited to Snake, but also utility mini programs, event pages, or real business prototypes. Your main task is to think clearly and describe clearly. Let AI and tools handle the rest.
|
||||
|
||||
# 4. Mini Program Release
|
||||
|
||||
In the previous three chapters, we completed the full flow from **environment setup** -> **AI-assisted development** -> **running Snake in local simulator**.
|
||||
|
||||
From this chapter, the key question becomes: **how to really publish this work to WeChat, so it is not just a toy, but a usable mini program?**
|
||||
|
||||
To reduce difficulty, we first take the **shortest closed loop**: publish only as a **test/experience version** for yourself and a few teammates. After function and experience are stable, then proceed to formal public release.
|
||||
|
||||
This chapter first covers 4.1 to complete the shortest path for **experience-version launch**. Formal release for all users is explained in 4.2.
|
||||
|
||||
## 4.1 Shortest SOP - Launch as Experience Version
|
||||
|
||||
Goal of this subsection is only one thing: let you open your Snake mini program in WeChat as an **experience version**.
|
||||
|
||||
The whole flow is four tasks:
|
||||
|
||||
1. Find and confirm your AppID in WeChat Official Platform.
|
||||
2. Configure this AppID in your project.
|
||||
3. Upload current version in WeChat DevTools.
|
||||
4. Return to Official Platform and set this uploaded version as "Experience Version."
|
||||
|
||||
Let us go in this order.
|
||||
|
||||
### 4.1.1 Confirm AppID in WeChat Official Platform
|
||||
|
||||
First step: confirm your mini program AppID in WeChat Official Platform.
|
||||
|
||||
You already did this once in **Section 2 Environment Setup**. Here we use it for real.
|
||||
|
||||
1. Visit `https://mp.weixin.qq.com` and log into your mini program backend.
|
||||
2. Find "Development Management" in left menu, then enter "Development Settings."
|
||||
3. At top, find "Developer ID" area. There is a line "AppID (Mini Program ID)" - this is your unique ID.
|
||||
|
||||
This ID must exactly match project config. Otherwise WeChat sees it as another app identity and preview/publish will fail.
|
||||
|
||||

|
||||
|
||||
### 4.1.2 Fill AppID in Project
|
||||
|
||||
Second step: write this AppID into project configuration so local build maps to your official mini program account.
|
||||
|
||||
If your project uses uni-app template, do this:
|
||||
|
||||
1. Open HBuilderX and load Snake project.
|
||||
2. Find `manifest.json` in file tree and open it.
|
||||
3. Scroll to "WeChat Mini Program Configuration," and you will see an input such as "WeChat Mini Program AppID."
|
||||
4. Paste AppID copied from Official Platform exactly, then save file.
|
||||

|
||||
|
||||
Now your local project has claimed this mini program identity. Next, when you upload from WeChat DevTools, it will be recorded under this AppID.
|
||||
|
||||
### 4.1.3 Upload a Version in WeChat DevTools
|
||||
|
||||
We have already run project into WeChat DevTools to preview simulator.
|
||||
|
||||
Now we do: "package current code as a version and upload to server."
|
||||
|
||||
Steps:
|
||||
|
||||
1. In top-right toolbar of WeChat DevTools, click "Upload."
|
||||
2. In popup, fill two key fields:
|
||||
1. Version number: for example `1.0.0` (digits and dots only).
|
||||
2. Project note: short description, such as "Completed core gameplay."
|
||||
3. Confirm and click "Upload." Output panel shows build process. If all steps turn green and upload completes, this version is successfully submitted to WeChat server.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 4.1.4 Set Uploaded Version as Experience Version in Backend
|
||||
|
||||
Upload only sends code to WeChat side. You still need to tell system "this is an experience version."
|
||||
|
||||
Final step: go back to Official Platform backend and complete loop.
|
||||
|
||||
1. Open `https://mp.weixin.qq.com` and enter mini program backend.
|
||||
2. In left menu, find "Management" -> "Version Management."
|
||||
3. In "Development Version" section, you should see the uploaded version: version `1.0.0`, your note, and just-uploaded timestamp.
|
||||
4. On the right side of this row, use dropdown/action button to choose "Set as Experience Version," confirm action. Before this step, ensure your main category is configured on homepage/category settings.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
After completion, this version becomes your mini program "Experience Version." You can generate experience QR code in backend, or add yourself/team as experience members, then scan in WeChat for real-device testing.
|
||||
|
||||
At this point, we have finished the shortest loop from local project to test launch:
|
||||
|
||||
You do not need to open to all WeChat users immediately. In a safe range, run real mini program in real WeChat environment first. That is enough for feature testing, feedback collection, and iteration.
|
||||
|
||||
## 4.2 Formal Launch of Mini Program
|
||||
|
||||
After experience version runs well, you can already play this Snake mini program in your own WeChat.
|
||||
Next step is moving from limited experience users to a fully public WeChat mini program.
|
||||
|
||||
Break this into steps: complete basic info, choose category, finish filing, then submit review. Follow this order:
|
||||
|
||||
### 4.2.1 Enter Mini Program Release Flow
|
||||
|
||||
First go back to WeChat Official Platform backend and log in.
|
||||
In left navigation find entries related to "Version Management / Release" (UI may vary slightly over time). You will find "Mini Program Release Flow."
|
||||
|
||||
After entering, top area shows a progress bar. Below it lists steps such as:
|
||||
|
||||
1. Mini Program Information
|
||||
2. Mini Program Category
|
||||
3. Operation Information / Filing
|
||||
4. WeChat Verification (depending on entity type)
|
||||
|
||||
At beginning progress is 0%. As each step is completed, system updates automatically.
|
||||
|
||||

|
||||
|
||||
### 4.2.2 Fill Basic Mini Program Information
|
||||
|
||||
First step is completing your mini program "business card," which is what users first see in WeChat.
|
||||
|
||||
On "Mini Program Information" page, you usually need to fill/confirm:
|
||||
|
||||
1. Mini program name
|
||||
This appears in search results and app header. It has length limits and naming rules. Choose a name that describes function and is easy to remember.
|
||||
2. Description / intro
|
||||
Use one or two sentences to explain what this mini program does, for example: "A Snake game developed with AI-assisted coding, suitable for quick casual play."
|
||||
Keep description consistent with real functionality and avoid exaggerated marketing text.
|
||||
3. Icon and screenshots
|
||||
1. Icon usually requires square image with PNG/JPG support and size/pixel limits (check page rules). Use simple, high-contrast icon.
|
||||
2. Upload several screenshots such as homepage, game page, settings page. They help users understand content.
|
||||
4. Other required fields
|
||||
Such as tags and service region, fill according to prompts.
|
||||
Only one principle: all information must match real functionality of your Snake mini program.
|
||||
|
||||

|
||||
|
||||
After all fields are done, click Save or Next. First step in release flow is complete.
|
||||
|
||||
### 4.2.3 Select Mini Program Service Category
|
||||
|
||||
After basic information, wizard guides you to "Mini Program Category."
|
||||
Category is your app's classification in WeChat, affects review route and later display/operation.
|
||||
|
||||

|
||||
|
||||
On this page you will see "Add Category." Click it and choose proper category in system category tree, for example:
|
||||
|
||||

|
||||
|
||||
1. Choose "Education" as top-level category;
|
||||
2. Then choose more specific subcategory such as "Education Tools / Teaching Assistant." In this example, education tools are selected as learning aid for vibecoding.
|
||||
|
||||
In your own project, simply choose the closest category by real use case.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
After confirming category, click Save. If page shows "category created successfully" and displays your new item, this step is complete.
|
||||
|
||||
### 4.2.4 Complete Filing Information
|
||||
|
||||
Next, release flow asks for "Operation Information / Filing." This verifies responsible entity behind mini program.
|
||||
|
||||

|
||||
|
||||
Under individual entity example, flow usually includes:
|
||||
|
||||
1. Select filing type
|
||||
Choose among types such as "Individual" or "Enterprise," consistent with your registration entity.
|
||||
2. Fill entity information
|
||||
Include name, ID type, ID number, etc. This must match registration information, otherwise review may reject.
|
||||
3. Upload supporting documents
|
||||
Usually requires ID photos or other proof files, with specific format/size/clarity requirements shown on page. Prepare and upload clear files.
|
||||

|
||||
|
||||
After submission, system enters "under review" and shows a message like "Information submitted, please wait." This may take some time. You can check progress anytime in backend.
|
||||
|
||||

|
||||
|
||||
### 4.2.5 Submit for Review and Wait for Formal Release
|
||||
|
||||
When "Mini Program Information," "Category," and "Operation Information/Filing" are all completed, do final action: submit for review.
|
||||
|
||||
1. Return to release-flow overview page and confirm all items show completed, with progress close to 100%.
|
||||
2. Click "Submit for Review" (or similar button) to submit current development version to WeChat review team.
|
||||
3. In "Version Management," this version status becomes "Under Review." After approval it becomes "Published" or available for "Go Live."
|
||||
|
||||
If filing review fails, developers may receive a call specifying failed parts.
|
||||
|
||||
For filing, you may receive verification code and verification link from Ministry of Industry and Information Technology. Open link and fill code + personal info (verification valid for 1 day). If filing passes, you receive email and SMS notice with filing number.
|
||||
WeChat verification: individual usually pays 30 CNY, enterprise around 300 CNY. Fee is non-refundable regardless of approval result. You may receive verification notice and confirmation call.
|
||||
|
||||
When submitting review, upload operation video/screens and fill required info. Then click "Submit Release" for formal launch.
|
||||
|
||||

|
||||
|
||||
# 5. Summary
|
||||
|
||||
At this point, you have completed a full **0-to-1** mini program development loop: from understanding mini programs, to installing Trae, HBuilderX, and WeChat DevTools; from giving AI your idea and letting it "move bricks" in code, to playing first Snake version in simulator; then packaging as experience version, finishing filing/review, and making it truly usable in WeChat - you have personally run through the full chain once.
|
||||
|
||||
More importantly, you did not achieve this by memorizing syntax. You achieved it by clearly expressing requirements + communicating effectively with AI. You have already experienced this: **one natural-language instruction can let AI satisfy your development needs very effectively**. This capability is not limited to Snake. It can transfer to any mini program you want to build later - tools, event pages, educational apps, or real work projects.
|
||||
|
||||
If we summarize into a **general SOP**, it is only five steps:
|
||||
**Clarify one small requirement -> build project skeleton in Trae -> use vibecoding + AI to create first version -> repeatedly play-test and improve in WeChat DevTools -> upload, file, review, and launch.**
|
||||
Each time you repeat these five steps, you gain another real mini program that can be opened and shared, and another layer of confidence that "I can use AI to turn ideas into products."
|
||||
|
||||
Next, you can keep polishing this Snake app, or close it and start a blank project from your own idea. No matter what you build, remember one thing: you are no longer just someone who "wants to build something." You are already a vibecoding developer who has run the full workflow. The rest is repetition until this capability becomes habit.
|
||||
|
||||
# References:
|
||||
|
||||
- https://zhuanlan.zhihu.com/p/1889401120939567074
|
||||
- https://blog.csdn.net/2401_87407347/article/details/155193007
|
||||
@@ -0,0 +1,3 @@
|
||||
# Cross-Platform Development - How to Build a WeChat Mini Program (with Backend)
|
||||
|
||||
> This chapter is currently being written. Stay tuned...
|
||||
@@ -0,0 +1,679 @@
|
||||
# How to Build a Simple Android App - Native Compose Development
|
||||
|
||||
# 1 What Android Apps and Android Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: **from an idea in your mind to a real app that can be successfully installed and run on an Android phone.**
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- A computer with decent performance (Windows or Mac)
|
||||
- An Android phone (optional; if you do not have one, we will use an emulator)
|
||||
- Android Studio installed (for building)
|
||||
- Trae installed and registered (for AI coding)
|
||||
|
||||
## 1.1 Definition of Android App
|
||||
|
||||
An Android App is a native application that runs on the Android operating system. Unlike mini programs, it does not depend on a host like WeChat. It runs directly at the system level. It has its own home-screen icon, launches quickly, feels smooth, and can deeply access system-level features such as Bluetooth, sensors, and background services.
|
||||
|
||||

|
||||
|
||||
## 1.2 Android App Development
|
||||
|
||||
Android development refers to the whole process of building such applications. In the Vibe Coding development mode used in this tutorial, with **AI-assisted programming**, the developer's role shifts from "code writer" to "product architect":
|
||||
|
||||
1. **You (architect / PM)**: responsible for business logic design, prompt writing, and final acceptance of the result.
|
||||
2. **Trae (AI engineer)**: responsible for executing instructions, converting natural language into standard Kotlin code and Jetpack Compose layouts, and handling syntax errors and logic details.
|
||||
3. **Android Studio (build factory)**: responsible for providing the compile environment, packaging code into a runnable app, and offering emulator previews.
|
||||
|
||||
## 1.3 Common Ways to Build Android Apps
|
||||
|
||||
In real development, there is more than one way to build Android apps. We will not go deep here, but only provide an overall understanding.
|
||||
|
||||
**The first way: Native Development**
|
||||
This is Google's official and recommended route. You directly use **Kotlin** and **Jetpack Compose** to develop. Its advantage is the best performance and full access to phone hardware.
|
||||
|
||||

|
||||
|
||||
**The second way: Cross-Platform Development**
|
||||
For example Flutter or React Native. The core idea is "write one codebase and generate both Android and iOS apps."
|
||||
|
||||
**The third way: Hybrid Development**
|
||||
In essence, this is wrapping a webpage inside an app shell. This is fast to develop, but the experience and smoothness are usually not as good as a native app, and it is difficult to build a polished, immersive small tool this way.
|
||||
|
||||
**This tutorial's choice: native development (** **Kotlin + Compose)** combined with AI tools for coding.
|
||||
The reason is simple: native Jetpack Compose code has a very clear structure and is highly suitable for AI to understand and generate. We do not need to handwrite code from scratch. Instead, we guide Trae with natural language to generate high-quality native code.
|
||||
|
||||

|
||||
|
||||
## 1.4 Android App Development Steps Covered in This Tutorial
|
||||
|
||||
To keep the learning process interesting, this tutorial revolves around a relaxing but technically representative case - **Electronic Wooden Fish**. We combine Trae's Vibe Coding mode with a route you can reuse repeatedly:
|
||||
|
||||
1. **Build understanding and environment**: understand what Android apps are, install Android Studio and Trae, and configure China-friendly mirrors so the toolchain works smoothly.
|
||||
2. **Build the project skeleton**: create a blank Android project that can successfully run in the emulator.
|
||||
3. **AI iterative development**: open the project in Trae, then through conversation with AI, gradually implement the wooden fish image, tap animation, sound effects, floating text, and more.
|
||||
4. **Real-device debugging and polishing**: move beyond the emulator, install the app on your actual phone, experience real vibration feedback, and let AI help investigate bugs.
|
||||
5. **Packaging and publishing**: generate a formal APK and understand how to share or release it.
|
||||
|
||||
This section only draws the big picture and does not expand all commands yet. For now, just remember the main line: **environment setup -> skeleton building -> AI description and generation -> real-device polishing -> packaging and delivery**. In the next chapters, we will take you through each step.
|
||||
|
||||
# 2 Development Environment Setup
|
||||
|
||||
## 2.1 Tools Used in This Tutorial
|
||||
|
||||
During the whole development process, we use three tools together, playing the roles of "design," "construction," and "acceptance."
|
||||
|
||||
- **Trae**: this is your **AI coding partner**. In Vibe Coding mode, we no longer need to type code line by line. Instead, we mainly tell AI in natural language what we want, and it handles code generation and modification.
|
||||
- **Android Studio**: this is Google's official **app build factory**. Although it has many buttons, in this tutorial we mainly use it to create the project skeleton and compile Trae-generated code into something installable on a phone.
|
||||
- **An Android device**: this acts as the **test terminal** for viewing the result. You can connect it to your computer for real-device debugging and feel real vibration feedback. If you do not have one, Android Studio's built-in **Emulator** can simulate a virtual phone perfectly, which is enough for early development.
|
||||
|
||||
## 2.2 Download Trae
|
||||
|
||||
Trae is our main battlefield for **Vibe Coding**. You can think of it simply as an **"AI-powered code editor."**
|
||||
|
||||
Visit the official website [https://www.trae.cn](https://www.trae.cn), download the version matching your computer system (Windows or Mac), and install it just like ordinary software by double-clicking the installer and following the prompts. Once this tool is ready, in later practice we will stop staring at boring code windows and instead open the project here and tell AI what to build using natural language.
|
||||
|
||||

|
||||
|
||||
## 2.3 Download Android Studio
|
||||
|
||||
We need Android Studio to provide the Android SDK and emulator required for running the app. Visit the official download page [https://developer.android.com/studio?hl=zh-cn](https://developer.android.com/studio?hl=zh-cn) and download the package for your operating system (this tutorial is based on **2025.2.3**). After downloading, install it like normal software, keeping the default options throughout.
|
||||
|
||||
**Special reminder for beginners:**
|
||||
|
||||
Although modern versions of Android Studio have greatly simplified configuration, it still depends on the **JDK (Java Development Kit)** under the hood. If this is your first time doing development, or if you encounter errors related to environment variables or SDK configuration during installation, do not panic. You can refer to this detailed setup guide: [Android Studio2024版本安装环境SDK、Gradle配置](https://blog.csdn.net/keiraee/article/details/142321644?ops_request_misc=elastic_search_misc&request_id=a2b858d1f665095c53afa9114ad8864d&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-2-142321644-null-null.142^v102^pc_search_result_base4&utm_term=android%20studio%E5%AE%89%E8%A3%85%E5%8F%8A%E9%85%8D%E7%BD%AE&spm=1018.2226.3001.4187)
|
||||
|
||||

|
||||
|
||||
## 2.4 Create a New Project
|
||||
|
||||
Open Android Studio and click **New Project** on the welcome screen.
|
||||
|
||||
**Step 1: Choose a template**
|
||||
|
||||
In the template list, select **Empty Activity** (notice the Jetpack Compose icon on it).
|
||||
|
||||

|
||||
|
||||
**Step 2: Fill in project configuration**
|
||||
|
||||
Then you will see a configuration form. Fill it roughly as follows and keep the rest at default:
|
||||
|
||||
| **Field** | **Recommended Value** | **Explanation** |
|
||||
| ----------------- | -------------------------------------------------- | ---------------------------------------- |
|
||||
| **Name** | My Application 1 | App name shown on the phone home screen |
|
||||
| **Package name** | com.example.myapplication1 | Unique app identifier |
|
||||
| **Save location** | Custom path (for example `E:\AndroidProjects\Myapplication1`) | Project storage location; not recommended to place on C drive |
|
||||
| **Minimum SDK** | API 30 | Covers over 90% of active devices while balancing compatibility and features |
|
||||
| **Language** | Kotlin (recommended) | Kotlin is Google's officially recommended language, cleaner and safer |
|
||||
|
||||

|
||||
|
||||
**Step 3: Wait for project build**
|
||||
|
||||
Click **Finish**. Android Studio will automatically download dependencies and build the project (you will see a progress bar in the bottom-right corner).
|
||||
|
||||
- _Note: the first project creation may take several minutes. Wait patiently until the bottom progress finishes and the project file tree is fully loaded on the left._
|
||||
|
||||
## 2.5 Dependency Configuration: Gradle Download and Gradle Repository Mirrors
|
||||
|
||||
> This is one of the few steps in the Vibe Coding workflow where **manual operation** is recommended. Although AI can also help modify config, environment configuration touches low-level files, so manual changes are more reliable.
|
||||
|
||||
Why do we need to modify the configuration?
|
||||
|
||||
By default, Android Studio connects to overseas servers, so downloading build tools and dependencies may take an hour or even fail. After switching to domestic mirrors, it often finishes within a few minutes. **This is a one-time task that pays off forever.**
|
||||
|
||||
1. **Preparation**
|
||||
|
||||
If the bottom-right status bar of Android Studio is currently showing a progress bar like `Gradle Building...`, pause the ongoing dependency download first to avoid file conflicts.
|
||||
|
||||

|
||||
|
||||
2. **Speed up Gradle download**
|
||||
|
||||
In the project file tree on the left, expand `gradle` -> `wrapper`, then open `gradle-wrapper.properties`. Change the download source to Tencent's mirror:
|
||||
|
||||
```text
|
||||
distributionUrl=https\://mirrors.cloud.tencent.com/gradle/gradle-8.7-bin.zip
|
||||
```
|
||||
|
||||
Be careful: you only need to replace `services.gradle.org/distributions` with `mirrors.cloud.tencent.com/gradle`. Do not change anything else.
|
||||
|
||||

|
||||
|
||||
3. **Speed up dependency repository download**
|
||||
|
||||
Then, open `settings.gradle.kts` in the project root, and replace the content inside the `repositories` block with the following:
|
||||
|
||||

|
||||
|
||||
Replace the highlighted section with this code (latest source list as of 2025-02-21):
|
||||
|
||||
```json
|
||||
// Aliyun mirrors (covering Maven Central, Google, JCenter, etc.)
|
||||
maven { setUrl("https://maven.aliyun.com/repository/public/") }
|
||||
maven { setUrl("https://maven.aliyun.com/repository/google/") }
|
||||
maven { setUrl("https://maven.aliyun.com/repository/jcenter/") }
|
||||
maven { setUrl("https://maven.aliyun.com/repository/gradle-plugin/") }
|
||||
// Huawei Cloud mirror
|
||||
maven { setUrl("https://repo.huaweicloud.com/repository/maven/") }
|
||||
// Tencent Cloud mirror
|
||||
maven { setUrl("https://mirrors.cloud.tencent.com/nexus/repository/maven-public/") }
|
||||
// NetEase mirror
|
||||
maven { setUrl("https://mirrors.163.com/maven/repository/maven-public/") }
|
||||
```
|
||||
|
||||
It should then look like the screenshot below:
|
||||
|
||||

|
||||
|
||||
4. **Save and apply changes**
|
||||
|
||||
At this point, save the file and click `Try Again` in the top-right corner. Android Studio will re-run the download. Wait a few minutes. When the console shows `BUILD SUCCESSFUL`, it means the environment setup is fully complete and we are ready to start coding.
|
||||
|
||||

|
||||
|
||||
## 2.6 Understand the Project Structure
|
||||
|
||||
After project creation succeeds, the **Project** panel will appear on the left. Switch to the **Android** view (default), and you will see key directories like this:
|
||||
|
||||
```text
|
||||
app/
|
||||
├── manifests/
|
||||
│ └── AndroidManifest.xml <- app "ID card", declares app name and entry Activity (MainActivity)
|
||||
│
|
||||
├── java/
|
||||
│ └── com.example.myapplication1/
|
||||
│ ├── MainActivity.kt <- app entry, builds UI with Jetpack Compose
|
||||
│ │
|
||||
│ └── ui/ <- controls the overall UI style (colors, fonts)
|
||||
├── res/
|
||||
│ ├── drawable/ <- image resources (for example ic_launcher.png)
|
||||
│ ├── mipmap/ <- app icon
|
||||
│ ├── values/ <- text, color, theme styles
|
||||
│ │ ├── colors.xml
|
||||
│ │ ├── strings.xml
|
||||
│ │ └── themes.xml
|
||||
│ └── xml/ <- system-related config files (not UI)
|
||||
└── build.gradle (Module: app) <- app build config (usually untouched at beginner stage)
|
||||
```
|
||||
|
||||
As beginners, we usually only need to focus on three files:
|
||||
|
||||
- `MainActivity.kt`: controls behavior and decides "what appears on the screen"
|
||||
- `AndroidManifest.xml`: registers components and decides "where the app starts"
|
||||
- `Theme.kt`: defines the visual appearance
|
||||
|
||||
# 3 Android App Development
|
||||
|
||||
In the first two chapters, we already understood what Android apps are and sharpened the two key tools: Trae and Android Studio. From this section on, we leave paper discussion and enter real practice. We will adopt Vibe Coding mode to build a very popular stress-relief app from scratch - **Electronic Wooden Fish**. It fits the "Vibe" theme well (simple and relaxing), while also covering three core parts of Android development: **UI interaction (tapping), data storage (merit count), and multimedia (sound effects)**.
|
||||
|
||||
Now, follow along and send the first instruction to AI.
|
||||
|
||||
## 3.1 The First "Master Prompt": From Zero to One
|
||||
|
||||
In Vibe Coding mode, we do not need to first create layout files and then write logic code as in traditional development. What we need to do is **describe the requirements clearly in one shot and let AI generate the first runnable prototype**.
|
||||
|
||||
Open the project directory we just created in Trae, and in the chat panel on the right, enter the following Prompt:
|
||||
|
||||
```text
|
||||
You are a senior Android development expert. Please rewrite the current MainActivity.kt and turn it into an "Electronic Wooden Fish" app. Requirements:
|
||||
1. The screen background is black.
|
||||
2. Display a wooden fish graphic in the center of the screen, moderate in size, in white.
|
||||
3. Show a line of white text above it: "Merit: 0".
|
||||
4. When the wooden fish in the center is tapped, the number increases by 1 and a simple scale animation effect appears (simulating the feeling of knocking).
|
||||
5. Use Jetpack Compose.
|
||||
```
|
||||
|
||||
After sending it, Trae will begin analyzing your project structure. A few seconds later, it will directly generate the full code for `MainActivity.kt`.
|
||||
|
||||
1. From its response, we can see its reasoning logic and interaction logic
|
||||
2. We can directly see which parts of the code were rewritten
|
||||
3. If we are not satisfied with the result, we can roll back to the previous version
|
||||
|
||||

|
||||
|
||||
## 3.2 Run and Preview (Emulator Debugging)
|
||||
|
||||
At this point, AI has completed the first round of development. But remember, what we see in Trae is only code "blueprints," not a real interactive app. Trae itself cannot directly run Android apps, so we need to rely on the **Virtual Device emulator** provided by Android Studio. It is like turning your computer screen into a virtual Android phone, allowing us to install the code immediately and view the real result.
|
||||
|
||||
Next, let us configure this "virtual phone."
|
||||
|
||||
**Step 1: Create the emulator**
|
||||
|
||||
Back in Android Studio, find and click **Device Manager** in the right toolbar. If you cannot find it, open it from `View -> Tool Windows -> Device Manager`.
|
||||
|
||||
In the panel, click **Add a new device**, then choose **Create Virtual Device** to enter the device selection window.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
In the hardware selection window, choose **Phone** and then **Smart Phone** (medium screen size), or any other device profile you prefer such as Pixel, then click **Next**.
|
||||
|
||||

|
||||
|
||||
**Step 2: Configure the system image**
|
||||
|
||||
In the **System Image** dialog, select **API 36.1**. If it has not been downloaded yet, click **Download** first, then select it after download is complete, and click **Finish**.
|
||||
|
||||

|
||||
|
||||
**Step 3: Start the emulator**
|
||||
|
||||
After successful creation, your new phone will appear in the device manager list. Click the **triangle play button** on the right. After a short wait, a phone-shaped window will pop up - this is your Android emulator.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
**Step 4: Run the app**
|
||||
|
||||
Now comes the magic moment. Make sure the emulator has started and is showing the desktop, then click the prominent **green Run triangle** in the top toolbar of Android Studio (or use shortcut `Shift + F10`). Android Studio will automatically compile the code written by Trae, package it as an app, and install it into the emulator.
|
||||
|
||||
Within seconds, you should see the emulator screen light up, showing a white wooden fish graphic in the center with the text "Merit: 0" above it. Try tapping it and see whether the number increases and the animation works. This is your first Android app.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## 3.3 Optimization Iteration (Add Assets and Sound)
|
||||
|
||||
At this stage, our app already has a basic shape: tapping increases the number. But it is still just a "mute" white geometric shape, lacking fun. Next, we will make the Electronic Wooden Fish much more immersive by adding a real image and knock sound effect.
|
||||
|
||||
**This is exactly the most attractive part of Vibe Coding mode.** In traditional development, adding sound effects and more complex animations is often a beginner's nightmare. You need to manage `MediaPlayer` resource loading and releasing (otherwise memory leaks may happen), and also calculate animation curves. In Vibe Coding mode, you do not need to care about these low-level details at all. You only need to tell AI like a director: "change the prop and add a sound effect when tapped," and the implementation appears immediately.
|
||||
|
||||
**Step 1: Prepare assets**
|
||||
You need one wooden fish image (`png`) and one knock sound effect (`mp3`).
|
||||
|
||||
- **Image asset**: copy the prepared `white_muyu.png` into `app/src/main/res/drawable`
|
||||
- **Audio asset**: in Android Studio, right-click the `res` folder in the left project panel, choose `New -> Android Resource Directory`, select **raw** as the resource type, click OK, then copy `voice.mp3` into the new `res/raw` folder. _(Note: if you plan commercial release, make sure you have legal rights to all assets.)_
|
||||
|
||||
Here are the image and sound assets I found for you. If it is inconvenient for you to search for your own, you can directly use them.
|
||||
|
||||

|
||||
|
||||
Knock sound download link: https://www.aigei.com/s?q=%E6%9C%A8%E9%B1%BC&type=sound
|
||||
Choose the first 1-second sound effect.
|
||||
|
||||

|
||||
|
||||
**Step 2: Send the iteration instruction**
|
||||
|
||||
After the assets are ready, go back to Trae. Trae will modify the code again and handle the audio-loading and animation logic for you. You only need to tell it which assets to use. Enter this Prompt:
|
||||
|
||||
```text
|
||||
I have added the assets. The image path is res/drawable/white_muyu.png and the sound effect path is res/raw/voice.mp3. Please update the code:
|
||||
1. Replace the wooden fish icon in the center with my image.
|
||||
2. Play the knocking sound every time the wooden fish is tapped.
|
||||
3. When tapped, show a temporary "+1" text above the wooden fish, then let it float upward and disappear (like floating score text in games).
|
||||
```
|
||||
|
||||

|
||||
|
||||
**Step 3: Verify the result**
|
||||
|
||||
After Trae finishes modifying the code, return to Android Studio and click the green Run button again (Re-run) to restart the emulator. At this point, your app will feel transformed. Try tapping continuously - you should hear a crisp "tok tok" sound and see the floating "Merit +1" text jumping out. This completes the key transition from "demo" to "product."
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## 3.4 What If Bugs Appear? (Debugging Loop with AI)
|
||||
|
||||
AI-generated code is not guaranteed to be perfect on the first try, just like top engineers also cannot promise bug-free code in one shot. But in Vibe Coding mode, bugs are no longer a wall blocking you; they become stepping stones in your collaboration with AI.
|
||||
|
||||
**Case 1: the app crashes**
|
||||
|
||||
Suppose the app crashes immediately after clicking Run, or tapping the wooden fish does not play sound. Traditionally, you would need to search for the error code, browse dozens of technical forums, and read lots of difficult English posts. In Vibe Coding mode, you only need to do one thing - **be a courier**.
|
||||
|
||||
**Steps:**
|
||||
|
||||
1. **Open the log**: find the **Logcat** panel at the bottom of Android Studio (the small cat icon).
|
||||
2. **Locate the error**: you will see scrolling logs, and the **red lines** are usually the key errors.
|
||||
3. **Copy and paste**: select the red English error text, copy it, and paste it into Trae: "I got this error while running. Please help me fix it."
|
||||
4. AI may immediately tell you something like: "This happened because vibration permission was not declared in `AndroidManifest.xml`," and then give you the fixed code. You just click Apply and move on.
|
||||
|
||||
**Case 2: the app runs, but the experience feels bad**
|
||||
|
||||
Sometimes the app does not crash, but still feels unsatisfying. For example, when tapping the wooden fish very quickly, you may notice that new "+1" animations do not show up until the previous "+1" fully disappears. That makes the feedback feel laggy and not satisfying. You do not need to study multi-threading or animation queues yourself. You only need to clearly describe that discomfort to AI.
|
||||
|
||||
Send this "advanced instruction" to Trae:
|
||||
|
||||
```text
|
||||
Please modify the current animation logic to solve the "fast tapping does not trigger" problem.
|
||||
Current issue: it seems there is only one animation state, so I have to wait until the previous "+1" completely disappears before another click responds.
|
||||
Requirements:
|
||||
1. Replace the single animation state with a mutableStateListOf-based list.
|
||||
2. Every time the wooden fish is tapped, add a new "+1" instance immediately to the list (with its own ID and initial position), regardless of whether the previous animation has finished.
|
||||
3. In the UI, iterate through this list so each "+1" runs its own upward-floating + fade-out animation independently.
|
||||
4. After a "+1" animation finishes, automatically remove it from the list to prevent memory leaks.
|
||||
Please directly provide the updated MainActivity.kt code.
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## 3.5 Final Result Showcase
|
||||
|
||||
In the previous steps, we already completed an Electronic Wooden Fish that can be seen and heard. To make it closer to a publishable app, we will use one final iteration to add **touch feedback** and **customization**. We will implement two core features: first, **vibration feedback**, so every tap gets a physical response from the phone motor and greatly improves immersion; second, **custom text**, allowing users to modify the text on screen, for example changing "Merit +1" to "Salary +1" or "Trouble -1".
|
||||
|
||||
Send the following carefully designed Prompt to Trae. It will handle the dialog logic, state switching, and hardware interaction in one pass:
|
||||
|
||||
```text
|
||||
Role: You are an Android Jetpack Compose expert.
|
||||
Task: Please add "custom text" and "vibration feedback" to the existing Electronic Wooden Fish app.
|
||||
Requirements:
|
||||
1. Haptic Feedback
|
||||
Whenever the user taps the wooden fish, in addition to sound and animation, call the phone's haptic feedback (using LocalHapticFeedback.current) to give a light tactile response.
|
||||
2. Custom Text Feature (UI and interaction)
|
||||
Entry: Add a small edit icon next to the top text such as "Merit +1" (you can use Icons.Default.Edit).
|
||||
Dialog logic: When the icon is tapped, show a dialog (Dialog/AlertDialog).
|
||||
Dialog title: "Modify Content"
|
||||
Input: Allow the user to enter the text they want to accumulate (default is "Merit")
|
||||
Value choice: Below the input, provide two options (for example RadioButton or toggle) so the user can choose "+1" or "-1"
|
||||
Save button: After clicking save, close the dialog and apply the new settings to the home screen
|
||||
Data refresh: If the user updates the content, reset the top counter to 0 and start counting from zero again
|
||||
3. Effect update
|
||||
After saving, both the top counter text and the floating animation text shown when tapping the wooden fish should change to the user's custom format.
|
||||
The floating text size should not exceed the size of the top counter text
|
||||
Example: if the user enters "Salary" and chooses "+1", the top counter logic becomes +1 and the floating text becomes "Salary+1"
|
||||
If the user enters "Trouble" and chooses "-1", the top counter logic becomes -1 and the floating text becomes "Trouble-1"
|
||||
4. Technical requirements:
|
||||
Make sure the new state (text and number) correctly affects the animation.
|
||||
Please directly provide the full updated MainActivity.kt while keeping the previous sound and animation logic unchanged.
|
||||
```
|
||||
|
||||

|
||||
|
||||
# 4 Real-device Debugging and Polishing
|
||||
|
||||
The emulator is convenient, but it cannot simulate real phone vibration or fully reflect real touch latency. To get the most accurate "feel," we need to install the app on a real Android phone. Below are two connection methods you can choose from:
|
||||
|
||||
1. **Wireless debugging (Wi-Fi)**: no data cable required, convenient for daily checking. But your computer and phone must be on the **same Wi-Fi network**.
|
||||
2. **USB wired debugging**: more stable and less likely to disconnect, suitable when the network is poor or initial installation fails.
|
||||
|
||||
## 4.1 Wireless Debugging
|
||||
|
||||
This is the most convenient method on Android 11 and above.
|
||||
|
||||
**Step 1: Prepare the phone**
|
||||
|
||||
1. Make sure the phone and computer are on the **same Wi-Fi**.
|
||||
2. Open **Developer options** and enable **Wireless debugging**.
|
||||
3. Tap **Wireless debugging** to enter details, then choose **Pair device with QR code**. Your phone will open a scanner view.
|
||||
|
||||

|
||||
|
||||
**Step 2: Pair on the computer**
|
||||
|
||||
1. Back in Android Studio, click the device selector in the top toolbar.
|
||||
2. Choose **Pair Devices Using Wi-Fi** from the dropdown.
|
||||
3. A QR code will pop up on screen.
|
||||
|
||||

|
||||
|
||||
**Step 3: Scan to connect**
|
||||
|
||||
1. Use your phone to scan the QR code on your computer screen.
|
||||
2. Both the phone and computer should show "pairing successful."
|
||||
3. At this point, Android Studio's top device bar will automatically display your phone model (for example `Google Pixel 8`).
|
||||
|
||||

|
||||
|
||||
4. Run the app by clicking ▶️ Run
|
||||
|
||||

|
||||
|
||||
## 4.2 USB Wired Debugging
|
||||
|
||||
If wireless connection is unstable, or your network is complicated, plugging in with a cable is always the most reliable solution. Although it is less convenient, it gives the fastest transfer speed and almost never disconnects.
|
||||
|
||||
### 4.2.1 Prepare USB Driver in Android Studio (Windows only)
|
||||
|
||||
Mac users can skip this step, because macOS usually recognizes the phone directly. Windows users need to make sure the computer can recognize the Android phone, which usually means installing Google's USB driver:
|
||||
|
||||
1. In Android Studio, click `Tools -> SDK Manager` (or find it under `Settings -> Languages & Frameworks -> Android SDK`)
|
||||
2. Switch to the **SDK Tools** tab
|
||||
3. Check **Google USB Driver** and click **Apply** to download and install it
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 4.2.2 Download the Same SDK Version as Your Real Device
|
||||
|
||||
**Step 1: Check the phone's Android version**
|
||||
|
||||
Using an OPPO phone as an example: open Settings -> About phone -> check Android version (in the example it is Android 12).
|
||||
|
||||

|
||||
|
||||
**Step 2: Download that Android platform version in Android Studio**
|
||||
|
||||
1. In Android Studio, click `Tools -> SDK Manager`
|
||||
2. Stay in the default **SDK Platforms** tab
|
||||
3. Select Android 12.0 and click Apply to download
|
||||
|
||||

|
||||
|
||||
### 4.2.3 Enable Developer Mode on the Phone
|
||||
|
||||
Open your phone settings, go into developer options, and turn on **USB debugging**.
|
||||
|
||||

|
||||
|
||||
### 4.2.4 Install the USB Driver Authorization on the Phone
|
||||
|
||||
At this point, pick up your phone. It should show an important security dialog: "Allow USB debugging?" Make sure to check **Always allow** and then tap **Allow** or **OK**. This is the key authorization that gives the computer control for debugging.
|
||||
|
||||

|
||||
|
||||
### 4.2.5 Run the App on the Phone
|
||||
|
||||
1. In Android Studio's top device selector, you should now see your phone model (for example `OPPO-PDKM00`)
|
||||
2. Click ▶️ Run. Your phone will show the "Allow USB debugging?" dialog; check "Always allow" and confirm
|
||||
3. The app will automatically install and launch
|
||||
|
||||
Now try tapping the wooden fish on your phone and feel the real vibration motor response. This is the full Vibe Coding experience.
|
||||
|
||||

|
||||
|
||||
# 5 Package the App as APK
|
||||
|
||||
The code is done, and the real-device test also works. Now we need to "take the app out" of Android Studio and turn it into a file you can send to friends for installation. This process is called **packaging**. In Android development, packaging has two completely different modes, and we choose based on the usage scenario.
|
||||
|
||||
## 5.1 Package the Debug Version (for Quick Sharing)
|
||||
|
||||
If you only want to share the app with friends for a quick try, or send it to test phones for verification, the **Debug version** is the fastest option. It is like a "draft" - fully functional, but not formally signed, so it cannot be submitted to app stores.
|
||||
|
||||
**The steps are very simple:** in the top menu of Android Studio, find `Build`, hover over `Generate App Bundles or APKs`, and click `Generate APKs` from the submenu.
|
||||
|
||||

|
||||
|
||||
Wait about 5 seconds depending on project size. In the bottom-right console area of Android Studio, a prompt will appear. Click the blue `locate` link and the output folder will open automatically. The file named `app-debug.apk` is the package we want.
|
||||
|
||||
You can directly send it through WeChat or QQ to any Android phone, and the recipient can install and use it. Note that debug is not a release version.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## 5.2 Package the Release Version
|
||||
|
||||
If you want to publish the app to an app store (such as Google Play or Huawei AppGallery), or avoid the "unsafe app" warning during installation, then you must package a **Release version**. This version requires a unique **digital signature**, which is like an anti-counterfeit seal proving that you developed this app and that it has not been tampered with.
|
||||
|
||||
> Core purpose of signing
|
||||
>
|
||||
> - Determine the publisher's identity: because an app with the same package name can replace an installed program, signing prevents that from being abused
|
||||
> - Ensure app integrity: the signing process covers every file in the package, ensuring they are not replaced afterward
|
||||
|
||||
Android app signing is like attaching a seal. After the seal is attached, the app and the developer are locked together: the app is yours, and you are responsible for it. Others cannot impersonate you, and you cannot impersonate others.
|
||||
|
||||
**Step 1: Start the signing wizard**
|
||||
|
||||
In the top menu, select `Build`, then click `Generate Signed Bundle / APK`. In the popup window, you will face two choices:
|
||||
|
||||
- Android App Bundle (`.aab`): required by Google Play, smaller in size, but cannot be directly installed on a phone
|
||||
- APK: standard install package, can be installed directly
|
||||
_For demonstration, we choose APK first and click Next._
|
||||
|
||||

|
||||
|
||||
**Step 2: Create a digital key (KeyStore)**
|
||||
|
||||
This is where beginners get stuck most often. Because this is your first release packaging, you need to create a new **keystore**. Click **Create new** below `Key store path`.
|
||||
|
||||

|
||||
|
||||
In the popup, fill in the required information, similar to registering an account. We strongly recommend that the keystore password and key alias password be **the same**, and that you **write them down carefully**. If you lose this password, your app can never be updated again in the future.
|
||||
|
||||
After finishing, click OK. You will return to the previous screen, and the key information you just filled in will already be populated automatically.
|
||||
|
||||

|
||||
|
||||
**Step 3: Generate the formal package**
|
||||
|
||||
Click Next, choose **release** under Build Variants, and finally click **Create**.
|
||||
|
||||
After a short wait, Android Studio will again show a "Generate Signed APK" success prompt in the bottom-right corner. Click **locate**, and this time you will see the digitally signed formal package in the folder (usually named `app-release.apk`). This file is the final product you deliver as a developer.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
# 6 Official Release to App Stores / Markets
|
||||
|
||||
When your app development is finished and the Release package is ready, the next step is to publish it so more people can download and use it. Right now, the main distribution channels are divided into two categories: **domestic Android app stores** and **overseas app stores (Google Play)**.
|
||||
|
||||
## 6.1 Publish to Domestic Markets
|
||||
|
||||
The Android ecosystem in mainland China is special. There is no single official store (because Google Play is not directly accessible). Instead, the market is split between **phone-maker app stores** and **third-party platforms**. The major **manufacturer stores** include Huawei, Xiaomi, OPPO, vivo, Meizu, Samsung, etc. Since they are preinstalled on devices, they have the largest traffic. The main **third-party platforms** include Tencent MyApp and 360 Mobile Assistant.
|
||||
|
||||
### 6.1.1 The Core Difficulty: The "Roadblock" for Individual Developers
|
||||
|
||||
Before registering an account, there is one very important thing you must know: **domestic app markets are very strict with individual developers**.
|
||||
|
||||
At present, almost all major domestic app stores (Huawei, Xiaomi, OV, MyApp, etc.) **require** a *Software Copyright Registration Certificate* for submission.
|
||||
|
||||

|
||||
|
||||
- **What is it?** It is a legal document proving that the app belongs to you.
|
||||
- **Cost to obtain it**: you need to apply through the copyright bureau. Doing it yourself usually takes 2-3 months; using an agency for faster processing may cost from several hundred to over a thousand RMB.
|
||||
- **Current reality**: without this certificate, your app will very likely fail review, or you may not even be able to create the app entry. In addition, categories such as news, finance, and healthcare may also require ICP filing or other qualifications.
|
||||
|
||||
So if your app is just a personal practice project or small tool, and you do not want to spend time and money applying for this certificate, I suggest jumping directly to Section 6.2 and considering Google Play instead, or simply sharing the APK file with friends directly.
|
||||
|
||||
### 6.1.2 Register a Developer Account
|
||||
|
||||
If you have already prepared the required qualifications, or have decided to publish in domestic markets, the first step is account registration. The process is similar across major platforms, usually requiring ID verification for individuals or business license verification for companies.
|
||||
|
||||
Below are the developer platform URLs for major app markets:
|
||||
|
||||
Tencent Open Platform: https://open.tencent.com/
|
||||
|
||||
360 Open Platform: http://dev.360.cn
|
||||
|
||||
Baidu Developer Platform: http://app.baidu.com
|
||||
|
||||
Xiaomi Open Platform: https://dev.mi.com
|
||||
|
||||
Huawei Developer Alliance: http://developer.huawei.com/consumer/cn
|
||||
|
||||
Alibaba Developer Platform: http://open.uc.cn
|
||||
Alibaba distribution integrates Wandoujia, Ali Jiuyou, PP Assistant, UC App Store, Shenma Search, and YunOS App Store. You only need to register one Alibaba developer account.
|
||||
|
||||
Samsung Developer Platform: http://support-cn.samsung.com/App/DeveloperChina/Home/Index
|
||||
|
||||
OPPO Developer Alliance: http://open.oppomobile.com
|
||||
|
||||
vivo Developer Alliance: https://dev.vivo.com.cn
|
||||
|
||||
Lenovo Open Platform: http://open.lenovo.com
|
||||
|
||||
Meizu Developer Alliance: http://open.flyme.cn
|
||||
|
||||
Gionee Developer Alliance: https://open.appgionee.com
|
||||
|
||||
**Using Tencent MyApp as an example:** visit the Tencent Open Platform and click register. It is recommended to log in directly with a QQ account. Note that once a QQ account is bound, it is difficult to unbind, so it is better to use a dedicated work QQ account. Follow the prompts, choose "Individual Developer" or "Enterprise Developer," upload your ID photos, and complete face verification. After passing verification, click **Create App** to start.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
### 6.1.3 Submission Flow and Required Materials
|
||||
|
||||
After account review is approved, you can create the app and submit it for review. You need to prepare the following "four-piece set":
|
||||
|
||||
1. **Installation package**: the **Release APK** packaged in Chapter 5
|
||||
2. **Text information**:
|
||||
3. **App name**: must not contain sensitive words
|
||||
4. **One-line intro**: within 20 Chinese characters, simple and direct (for example: "A relaxing electronic wooden fish app")
|
||||
5. **Detailed description**: 200+ Chinese characters introducing the app's functions and usage scenarios
|
||||
6. **Visual materials**:
|
||||
7. **App icon**: high-definition PNG, usually 512x512
|
||||
8. **App screenshots**: prepare 4-5 clear screenshots of the app in use, preferably covering the main pages, usually in consistent size such as 1080x1920
|
||||
9. **Qualification document**: upload a scanned copy of your Software Copyright Registration Certificate
|
||||
|
||||
**Submission and review:** after filling in all information and uploading the APK, click **Submit for Review**. The review cycle is usually 1-3 business days. During that period, pay attention to email or SMS. Reviewers may reject the submission because screenshots are unclear, descriptions are not standardized, or required qualifications are missing. In that case, you revise according to the feedback and resubmit.
|
||||
|
||||
## 6.2 Publish to Overseas Market (Google Play)
|
||||
|
||||
If you do not want to deal with the complexity of software copyright certificates and filings in domestic app stores, or if your target audience is global, Google Play is the best choice for individual developers.
|
||||
|
||||
### 6.2.1 Preparation
|
||||
|
||||
- **Google account**: a normal Gmail account is enough
|
||||
- **$25 registration fee**: this is a **one-time lifetime fee**, and requires a credit card that supports USD payments (Visa / Mastercard)
|
||||
- **Reliable network access**: you need to be able to access Google Play Console smoothly
|
||||
- **Formal installation package**: note that Google Play requires the **.aab** (Android App Bundle) format, not APK. In Android Studio, choose Android App Bundle during packaging. The steps are almost identical to packaging APK.
|
||||
|
||||

|
||||
|
||||
### 6.2.2 Google Play Console Release Process (Overview)
|
||||
|
||||
Because Google Play registration and payment still have some entry barriers (such as the need for an overseas credit card), this tutorial does not currently provide step-by-step screenshots. But here is the common four-step process:
|
||||
|
||||
**Step 1: Create an app and enter the console**
|
||||
|
||||
Click `Create app`, fill in the app name (`Electronic Wooden Fish`), choose English as the language, choose App and Free as the app type, then check the agreement. After that, you will have access to the backend.
|
||||
|
||||
**Step 2: Decorate the store page**
|
||||
|
||||
This is the user's first impression. You need to upload the prepared app **icon** (512x512) and a **feature graphic** (1024x500). As for the English description, you can simply ask Trae: **"Please help me write an English description for publishing Electronic Wooden Fish on Google Play, in a light and relaxing tone."** AI usually writes it more naturally than a direct translation.
|
||||
|
||||
**Step 3: Privacy and content rating**
|
||||
|
||||
- Privacy policy: search for "App Privacy Policy Generator" and generate a free link to paste in
|
||||
- Content rating: fill out a simple questionnaire (for example, whether there is violence or gambling). Electronic Wooden Fish usually gets a general 3+ rating.
|
||||
|
||||
**Step 4: Upload and publish**
|
||||
|
||||
Under the `Production` menu, click `Create new release`, upload your `.aab` file, save, and submit for review. Google Play review is usually fast (1-3 days). Once approved, your app can be downloaded worldwide.
|
||||
|
||||

|
||||
|
||||
_If you have already completed developer account registration, this video tutorial can guide you through the rest of the process:_ [Android应用上传GooglePlay谷歌市场全流程教程](https://www.bilibili.com/video/BV16REQzGEnk/?share_source=weixin&vd_source=b42f227a4f2d413fbde18499d83227cf)
|
||||
|
||||
# 7 Final Notes
|
||||
|
||||
That brings us to the end of the tutorial. Looking at the Electronic Wooden Fish you personally created on your phone, I wonder how you feel now.
|
||||
|
||||
As someone trained in software engineering, I actually feel quite emotional in today's fast-developing AI era. In the past, we worked through thick programming books, learned complex syntax, struggled with environment setup, and spent half of our day fighting red error messages. But times have changed, and now we are increasingly learning how to direct AI.
|
||||
|
||||
Through this Vibe Coding practice, you have already experienced the full Android app development process. The technical barrier is indeed getting lower. We no longer need to grind through dry code all the time, and can spend more energy on deciding **what to build**. But no matter how strong the tools are, they are still just tools. Do not let this app gather dust on your phone. Keep tinkering with it, break it and fix it again. Only when you start having your own ideas and bringing them to life do you truly cross the threshold.
|
||||
|
||||
If this tutorial helped you feel that "building an app is not actually that hard," then I am honored to have helped bring one more new-generation builder into the development world.
|
||||
|
||||
I am really looking forward to your next creation. Keep going!
|
||||
|
||||

|
||||
|
||||
**_Hope you have fun in the world of Android development!_**
|
||||
|
||||
# References
|
||||
|
||||
CSDN: [(2024.03.04)如何打包Android Studio项目?](https://blog.csdn.net/GenuineMonster/article/details/136443130?ops_request_misc=&request_id=&biz_id=102&utm_term=android%20studio%20%E6%89%93%E5%8C%85%20APK%20%E5%B9%B6%E5%88%86%E4%BA%AB&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-1-136443130.142^v102^pc_search_result_base4&spm=1018.2226.3001.4187)
|
||||
|
||||
CSDN: [Android Studio安装及配置](https://blog.csdn.net/Changersh/article/details/149838228?ops_request_misc=&request_id=&biz_id=102&utm_term=android%20studio%E5%AE%89%E8%A3%85%E5%8F%8A%E9%85%8D%E7%BD%AE&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-0-149838228.142^v102^pc_search_result_base4&spm=1018.2226.3001.4187)
|
||||
@@ -0,0 +1,734 @@
|
||||
# How to Build an iOS App - Native SwiftUI Development
|
||||
|
||||
## Chapter 1: What an iOS App and iOS App Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: **from an idea in your mind to a real iOS app that can be successfully installed and run on an iPhone.**
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
1. A Mac running a relatively recent macOS
|
||||
2. An iPhone running a relatively recent iOS version, with developer mode enabled
|
||||
3. Xcode successfully installed
|
||||
4. Trae installed and opened
|
||||
5. A usable Apple ID
|
||||
|
||||

|
||||
|
||||
### 1.1 iOS App
|
||||
|
||||
An iOS App is a native application running on the iPhone operating system. It launches quickly, feels smooth, and can deeply use system features such as notifications, camera, and local storage.
|
||||
|
||||

|
||||
|
||||
### 1.2 iOS App Development
|
||||
|
||||
At its core, building an iOS App only involves a few things:
|
||||
|
||||
1. Clarify the problem your app is solving
|
||||
2. Design the interface users can see and operate
|
||||
3. Define how the app behaves under different actions
|
||||
4. Build the app correctly and install it on an iPhone
|
||||
|
||||
### 1.3 Common Ways to Build iOS Apps
|
||||
|
||||
In real development, there is more than one way to build an iOS App. We will not go deep here, but only provide an overall understanding.
|
||||
|
||||
The first way is Apple's official native approach: create a project in Xcode and use Swift and SwiftUI to build the interface and logic.
|
||||
|
||||

|
||||
|
||||
The second way is to use cross-platform frameworks, such as React Native and Flutter, and adapt one codebase to multiple platforms.
|
||||
|
||||

|
||||
|
||||
Based on the approaches above, this tutorial chooses: **native SwiftUI development as the foundation, with AI tools doing the majority of the coding work**.
|
||||
|
||||

|
||||
|
||||
### 1.4 iOS App Development Steps Covered in This Tutorial (High-Level Preview)
|
||||
|
||||
The sample app used in this tutorial is **FridgeChef**.
|
||||
|
||||
The user enters the ingredients currently available in the fridge, and the app uses a real AI API to generate a feasible recipe, then saves the result locally for later review. This example fully covers the core parts of a real iOS application, including UI input and display, network requests, data parsing, local storage, and final installation and running on a real device.
|
||||
|
||||

|
||||
|
||||
- The overall idea from prototype to native app
|
||||
|
||||
In implementation, this tutorial adopts a staged approach. We will first use AI to quickly generate an interface prototype with HTML and CSS, confirm the layout structure and information hierarchy in the browser, and then migrate it into SwiftUI.
|
||||
|
||||
- Overall development flow preview
|
||||
|
||||
Overall, the following chapters will go through these stages in order:
|
||||
|
||||
1. Build basic understanding
|
||||
Understand the shape of an iOS app, common development methods, and what problem this sample app solves.
|
||||
2. Complete environment setup
|
||||
Prepare a Mac and an iPhone, update the systems, install Xcode and Trae, and create a basic iOS project that can run successfully in the simulator.
|
||||
3. Enter formal development
|
||||
Open the project in Trae and gradually generate the UI and basic interaction through conversation with AI, turning the app from an empty shell into something usable.
|
||||
4. Debug and organize
|
||||
When compilation errors appear or behavior does not match expectations, let AI help troubleshoot; when the structure becomes messy, use AI to refactor and simplify it.
|
||||
5. Run on a real device
|
||||
Configure signing, install the app on a real iPhone, and complete one full verification from code to hardware.
|
||||
|
||||
## Chapter 2: Development Environment Preparation
|
||||
|
||||
### 2.1 Required Devices and Systems
|
||||
|
||||
In this practice, two pieces of hardware are irreplaceable: a Mac and an iPhone.
|
||||
At the same time, both devices should be running **a relatively recent official system version**.
|
||||
|
||||
#### 2.1.1 Mac
|
||||
|
||||
iOS apps can only be developed and compiled on macOS. This is a hard requirement of Apple's platform.
|
||||
|
||||
To ensure Xcode can be installed and used normally, it is recommended that you update macOS to a relatively recent official version first. You can check and update from **System Settings -> General -> Software Update**.
|
||||
|
||||

|
||||
|
||||
#### 2.1.2 Real iPhone Device
|
||||
|
||||
In addition to the Mac, this tutorial also requires a real iPhone for verifying whether the app can be installed and launched correctly.
|
||||
|
||||
To keep the debugging process smooth, the iPhone should also run a relatively recent iOS version. You can check and update from **Settings -> General -> Software Update**.
|
||||
|
||||

|
||||
|
||||
Later in development, this iPhone will be connected to the Mac by cable for real-device debugging.
|
||||
|
||||
#### 2.1.3 Enable Developer Mode on iPhone
|
||||
|
||||
To install and run debug apps from Xcode on a real device, you need to enable developer mode on the iPhone.
|
||||
|
||||
Steps:
|
||||
|
||||
1. Open **Settings**
|
||||
2. Enter **Privacy & Security**
|
||||
3. Scroll to the bottom and find **Developer Mode**
|
||||
4. Turn it on, then restart the device as prompted
|
||||
5. After restart, unlock the device and confirm enabling developer mode
|
||||
|
||||

|
||||
|
||||
If your iPhone has never been connected to Xcode or other development tools before, you may find that **Developer Mode** does not appear under **Privacy & Security**. This is not a system issue - it simply means developer mode has not yet been triggered.
|
||||
|
||||
In that case, you can make it appear by following these steps:
|
||||
|
||||
1. Open **Settings -> Privacy & Security -> Analytics & Improvements**
|
||||
2. Turn on **Share With App Developers**
|
||||
3. Go back one level, enter **Privacy & Security** again, and scroll to the bottom
|
||||
4. You should now see **Developer Mode**, then enable it and restart the device
|
||||
|
||||
After completing the above steps, developer mode only needs to be enabled once. Future real-device debugging with Xcode will not require repeating this configuration.
|
||||
|
||||

|
||||
|
||||
### 2.2 Required Software
|
||||
|
||||
After devices and systems are ready, you still need to install the software used for development. This tutorial only uses two categories of tools: the official iOS development tool and the AI-assisted development tool.
|
||||
|
||||
#### 2.2.1 Xcode
|
||||
|
||||
Xcode is Apple's official development tool for iOS. In this tutorial, it is mainly used to create iOS projects, compile Swift / SwiftUI code, and run the app on the simulator or a real device.
|
||||
|
||||

|
||||
|
||||
Xcode can be found and installed directly from the App Store. After installation, when you open it for the first time, you will see the welcome screen. Later project creation starts from there.
|
||||
|
||||

|
||||
|
||||
#### 2.2.2 Trae
|
||||
|
||||
Trae is the main environment where development work is performed in this tutorial. You will place the whole iOS project into Trae and collaborate with AI through dialog to complete development.
|
||||
|
||||

|
||||
|
||||
### 2.3 Apple ID and Development Debugging Notes
|
||||
|
||||
On the iOS platform, in order for an app to be installed on a real device, it must go through developer signing. This tutorial does not require you to pay for Apple Developer Program membership. A personal Apple ID is enough.
|
||||
|
||||
### 2.4 Checklist Before Moving On
|
||||
|
||||
Before entering the next chapter, you can compare your current state with the checklist below.
|
||||
|
||||
You should now already have:
|
||||
|
||||
1. A Mac running a relatively recent macOS
|
||||
2. An iPhone running a relatively recent iOS version with developer mode enabled
|
||||
3. Xcode successfully installed
|
||||
4. Trae installed and opened
|
||||
5. A usable Apple ID
|
||||
|
||||
If all of these are ready, you can continue and create your first iOS app.
|
||||
|
||||
## Chapter 3: Create the First iOS Project
|
||||
|
||||
### 3.1 Use Xcode to Create a New Project
|
||||
|
||||
Open Xcode. On the welcome screen, choose to create a new project.
|
||||
|
||||

|
||||
|
||||
Click **Create new project** to enter the project template selection screen.
|
||||
|
||||
### 3.2 Choose App Template and Tech Stack
|
||||
|
||||
On the template selection screen, use the following configuration:
|
||||
|
||||
1. Platform: iOS
|
||||
2. Application type: App
|
||||
|
||||

|
||||
|
||||
Click **Next** to enter the project information configuration screen.
|
||||
|
||||
### 3.3 Configure Project Information
|
||||
|
||||
On the project information screen, just fill in the basic settings:
|
||||
|
||||
1. Product Name: app name (for example `FridgeChef`)
|
||||
2. Team: choose your personal Apple ID
|
||||
3. Organization Identifier: reverse-domain format (for example `com.example`)
|
||||
4. Bundle Identifier: generated automatically, keep default
|
||||
5. Testing System: Swift Testing with XCTest UI Tests
|
||||
6. Storage: choose Core Data (for later saving recipe history)
|
||||
7. Leave the other options at default
|
||||
|
||||

|
||||
|
||||
Click **Next** and choose the project storage location.
|
||||
|
||||

|
||||
|
||||
### 3.4 Recognize the Project Structure After Creation
|
||||
|
||||
After the project is created, Xcode will automatically open the workspace. At this point, you do not need to understand every file. You only need to recognize a few key parts.
|
||||
|
||||

|
||||
|
||||
In the default project, you will see:
|
||||
|
||||
- A folder named after the project
|
||||
- A Swift file ending with `App` (the application entry)
|
||||
- A `ContentView.swift` file (the default page)
|
||||
|
||||
This is already the smallest runnable iOS App.
|
||||
|
||||
### 3.5 Run the First iOS App
|
||||
|
||||
Before changing any code, run the original project directly.
|
||||
|
||||
In the top toolbar of Xcode, keep the default iPhone simulator selected, then click the **Run** button on the top left.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
If everything is normal, the simulator will show a blank app that can start successfully. The first compilation may take a relatively long time. In later chapters, we reduce waiting time by using HTML prototypes first.
|
||||
|
||||

|
||||
|
||||
To stop the app, click **Stop** next to the Run button.
|
||||
|
||||
### 3.6 What You Have Actually Achieved at This Stage
|
||||
|
||||
Even though the interface is still simple, you have already completed several key confirmations:
|
||||
|
||||
1. The project can compile successfully
|
||||
2. The simulator can run the app correctly
|
||||
3. The development process has already been proven to work end-to-end
|
||||
|
||||
This means that future problems will mainly focus on **the code and logic themselves**, rather than environment issues.
|
||||
|
||||
### 3.7 Hand the Project Over to Trae
|
||||
|
||||
Starting from the next section, the main development work will gradually move into Trae.
|
||||
|
||||
What you need to do is simple: **open the iOS project folder you just created in Trae.**
|
||||
|
||||

|
||||
|
||||
## Chapter 4: AI-Assisted Development Practice - Build FridgeChef from Scratch
|
||||
|
||||
This chapter is the core part of the entire tutorial.
|
||||
|
||||
This tutorial does not use the traditional route of "write SwiftUI first, repeatedly compile, and keep tweaking previews." Instead, we use a more efficient flow:
|
||||
**first use \*\***HTML\***\* to quickly validate the interface structure, then migrate the confirmed result into SwiftUI, and finally gradually complete business logic, local data, and interaction details.**
|
||||
|
||||
### 4.1 Stage One: Requirement Clarification
|
||||
|
||||
Before writing code, the first step is not building pages - it is clarifying what we are building. **Let AI first act like a \*\***product manager\***\* and organize the requirements into a structured specification document.**
|
||||
|
||||
In Trae's chat window, enter the following instruction. Trae will generate a `REQUIREMENTS.md` file in the project root, describing the functionality and structure of the whole app.
|
||||
|
||||
📋 **Prompt to copy:**
|
||||
|
||||
```text
|
||||
We are now going to develop an iOS App called "FridgeChef".
|
||||
|
||||
1. Core concept
|
||||
This is an AI assistant that solves the problem of "I don't know what to cook with the leftover ingredients in my fridge."
|
||||
Users input the ingredients they currently have, and the app calls a large model to generate a practical recipe.
|
||||
|
||||
2. Core functions
|
||||
- Home page:
|
||||
Show a prominent "Start Cooking" entry, and below it display historical recipe records in card or list form.
|
||||
- Input page:
|
||||
Users input ingredients, supporting text input or simple quick tags.
|
||||
- Result page:
|
||||
Display the AI-generated recipe, including dish name, ingredient list, and cooking steps.
|
||||
|
||||
3. Technical requirements
|
||||
- Use SwiftUI
|
||||
- Save data locally (Core Data)
|
||||
- Support basic page navigation and state updates
|
||||
|
||||
Please help me organize this into a clear, structured REQUIREMENTS.md document from the perspective of a product manager, and save it in the project root.
|
||||
```
|
||||
|
||||
After generation, quickly read through the document and confirm whether the function points match your expectations.
|
||||
|
||||

|
||||
|
||||
### 4.2 Stage Two: Visual Prototype
|
||||
|
||||
Let AI quickly draw a high-fidelity interface prototype using **HTML\*\*** + \***\*CSS**, so we can confirm the overall layout and style first. Continue by entering this in Trae:
|
||||
|
||||
📋 **Prompt to copy:**
|
||||
|
||||
```text
|
||||
The requirements are confirmed.
|
||||
Please use HTML + Tailwind CSS to generate a high-fidelity interface prototype for me.
|
||||
|
||||
Design style: Neo-Pop
|
||||
Colors:
|
||||
- Background: light cream #FFFDF5
|
||||
- Accent colors: acid green #CCFF00, hot pink
|
||||
|
||||
Visual characteristics:
|
||||
- 3px thick black borders
|
||||
- Hard shadow without blur (offset 4px)
|
||||
- Large rounded cards, overall sticker / comic feeling
|
||||
|
||||
Layout requirements:
|
||||
- Home page should use a Bento Grid-like layout
|
||||
- Include two screens: home page and input page
|
||||
|
||||
Please generate a single-file index.html and simulate an iPhone screen ratio around the content.
|
||||
```
|
||||
|
||||
After generation, find `index.html` in the file list and open it directly in a browser.
|
||||
|
||||

|
||||
|
||||
At this stage, the point is not whether every detail is perfect. The point is whether **the page structure is reasonable, the main elements are complete, and the overall direction is correct.**
|
||||
|
||||
### 4.3 Stage Three: Native Recreation
|
||||
|
||||
Once the HTML prototype is finalized, **translate the confirmed interface into SwiftUI.**
|
||||
|
||||
Steps:
|
||||
|
||||
1. Upload the `index.html` file (or a browser screenshot) into Trae
|
||||
2. Tell AI to generate SwiftUI code based on it
|
||||
|
||||
📋 **Prompt to copy:**
|
||||
|
||||
```text
|
||||
[index.html uploaded]
|
||||
|
||||
Please read the layout and style of this HTML file.
|
||||
|
||||
Task: recreate this interface in the current project using SwiftUI.
|
||||
|
||||
Requirements:
|
||||
1. Encapsulate a NeoPopStyle modifier including background color, thick border, and hard shadow
|
||||
2. Create HomeView.swift for the home layout
|
||||
3. Create InputView.swift for the input page
|
||||
4. Use Mock Data for now, and make sure it can display correctly in Xcode Preview and simulator
|
||||
```
|
||||
|
||||
After it finishes, open Xcode and run the simulator. You will see an iOS app that already has a complete visual structure.
|
||||
|
||||

|
||||
|
||||
### 4.4 Stage Four: Connect the AI API
|
||||
|
||||
Once the interface is done, the app is still only a display layer. Next we need to connect real AI capability. In this tutorial we use the large-model service provided by **SiliconFlow**:
|
||||
[https://cloud.siliconflow.cn](https://cloud.siliconflow.cn/)
|
||||
|
||||

|
||||
|
||||
SiliconFlow provides an API compatible with the OpenAI API specification, so it is very convenient to call from an iOS project using standard network requests.
|
||||
|
||||

|
||||
|
||||
Before starting, you need to register an account on the site and create an API Key.
|
||||
|
||||

|
||||
|
||||
This Key will be used for later model calls.
|
||||
|
||||
📋 **Prompt to copy:**
|
||||
|
||||
```text
|
||||
Now we need to connect AI capability.
|
||||
|
||||
Please create APIService.swift.
|
||||
|
||||
Configuration:
|
||||
- Base URL: https://api.siliconflow.cn/v1
|
||||
- Model: Qwen/Qwen2.5-7B-Instruct
|
||||
- API Key: define it as a variable for now, I will fill it later
|
||||
|
||||
Functions:
|
||||
- Write a generateRecipe(ingredients: [String]) method
|
||||
- The System Prompt must strictly require the model to return pure JSON only
|
||||
- JSON fields should include: dishName, ingredients, steps
|
||||
|
||||
Also define a RecipeModel struct for parsing the returned data.
|
||||
```
|
||||
|
||||
After the code is generated, fill in your own Key inside `APIService.swift`.
|
||||
|
||||
### 4.5 Stage Five: Core Data Local Storage
|
||||
|
||||
To let the app remember the recipes it has generated, we need to bring in local data storage. This stage is divided into two steps.
|
||||
|
||||
**Step 1: manually configure Core Data in Xcode**
|
||||
|
||||
1. Open `FridgeChef.xcdatamodeld`
|
||||
2. Create a new Entity named `RecipeEntity`
|
||||
|
||||

|
||||
|
||||
3. Add the following attributes:
|
||||
1. `id`: **UUID**
|
||||
2. `name`: **String**
|
||||
3. `cookTime`: **String**
|
||||
4. `difficulty`: **String**
|
||||
5. `desc`: **String**
|
||||
6. `timestamp`: **Date**
|
||||
7. `colorIndex`: **Integer 16**
|
||||
|
||||

|
||||
|
||||
**Step 2: let AI write the logic code**
|
||||
|
||||
📋 **Prompt to copy:**
|
||||
|
||||
```text
|
||||
I have finished configuring the Core Data Entity.
|
||||
|
||||
Entity: RecipeEntity
|
||||
Attributes: id, name, difficulty, timestamp, colorindex, cookTime, desc
|
||||
|
||||
Please complete the following tasks:
|
||||
1. Save data into Core Data after recipe generation succeeds
|
||||
2. Use FetchRequest on the home page to read historical records and display them in reverse chronological order
|
||||
3. When the database is empty, show a friendly empty-state message
|
||||
```
|
||||
|
||||
### 4.6 Stage Six: Generate an App Icon
|
||||
|
||||
The final step is to prepare a proper icon for the app. Here we use **Lovart** to generate the icon asset: [https://www.lovart.ai/zh](https://www.lovart.ai/zh)
|
||||
|
||||

|
||||
|
||||
📋 **Prompt to copy into Lovart:**
|
||||
|
||||
```text
|
||||
Subject: A cute anthropomorphic fridge character with a happy face
|
||||
Style: Minimalistic App Icon, Neo-pop style, thick black outlines, vector art
|
||||
Colors: Acid green (#CCFF00) and deep blue
|
||||
Background: Solid cream color
|
||||
Negative Prompt: Text, realistic details, 3D render, complex background
|
||||
```
|
||||
|
||||
After generation, crop the image to 1024x1024 and drag it into `Assets.xcassets` -> `AppIcon` in Xcode.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Run the app again, and you will now see a complete, recognizable, real iOS application.
|
||||
|
||||

|
||||
|
||||
### 4.7 Stage Seven: Advanced Experience Upgrade
|
||||
|
||||
Once the functionality is stable, if you want to further improve the visual style, you only need to describe the effect you want to AI, let it generate a new design proposal, and then migrate the confirmed result into SwiftUI.
|
||||
|
||||
📋 Reference Prompt:
|
||||
|
||||
```text
|
||||
The app's functionality is already complete, but I want to try a more visually impactful UI style.
|
||||
Please first generate a new design draft in HTML + Tailwind CSS for me, with the file name design_v2.html.
|
||||
|
||||
Design style: Neo-Pop (dopamine style)
|
||||
Color requirements:
|
||||
Use Deep Royal Blue as the full-screen background
|
||||
Use Acid Green (#CCFF00) as the accent color
|
||||
|
||||
Visual feel:
|
||||
All cards should use a 3px thick black border
|
||||
Use a hard shadow without transparency blur, shifted down-right
|
||||
|
||||
Layout requirements:
|
||||
Keep the home page structure unchanged
|
||||
Use pill-shaped buttons and input boxes
|
||||
|
||||
Please generate the full code so I can preview it in a browser.
|
||||
```
|
||||
|
||||
After it is generated, open this HTML file in a browser.
|
||||
|
||||

|
||||
|
||||
Once the HTML version is finalized, you can begin modifying the iOS project.
|
||||
|
||||
📋 Reference Prompt:
|
||||
|
||||
```text
|
||||
[design_v2.html uploaded]
|
||||
Please analyze the visual style of this HTML and migrate it into the current iOS project.
|
||||
|
||||
Task requirements:
|
||||
Create a new NeoPopStyle.swift file
|
||||
Encapsulate a neoPopBlue() style modifier
|
||||
|
||||
The modifier needs to include:
|
||||
- rounded corners
|
||||
- thick black border
|
||||
- opaque hard shadow
|
||||
|
||||
Refactor HomeView:
|
||||
- change the background to Deep Royal Blue
|
||||
- use Acid Green for the primary button
|
||||
- use white background for historical record cards
|
||||
- make sure text remains clear and readable on the dark background
|
||||
|
||||
Please provide the full modified code.
|
||||
```
|
||||
|
||||
Click Run in Xcode again. If everything works, you should see:
|
||||
|
||||
- the functionality is exactly the same as before
|
||||
- the visual style has changed significantly
|
||||
- the overall app quality feels noticeably upgraded
|
||||
|
||||

|
||||
|
||||
## Chapter 5: Running, Debugging, and Error Handling
|
||||
|
||||
In the previous chapter, you completed the core functionality and successfully ran the app in the simulator.
|
||||
But for an iOS app, true completion is not just "compiles successfully" - it is **stable operation, and knowing how to handle problems when they appear**.
|
||||
|
||||
### 5.1 Run the App in Xcode
|
||||
|
||||
First, make sure the project can run correctly in Xcode.
|
||||
|
||||
In the top-left of Xcode, select the run device and keep the default iPhone simulator. Click the **Run** button to compile and run. If everything is normal, the app will launch in the simulator and display the interface built in Chapter 4.
|
||||
|
||||
### 5.2 Run the App on a Real Device
|
||||
|
||||
Connect your iPhone to the Mac using a cable.
|
||||
|
||||

|
||||
|
||||
When connecting for the first time, the phone will show **Trust This Computer?** Tap trust and enter the unlock passcode.
|
||||
|
||||

|
||||
|
||||
In Xcode's device list, select your iPhone, then click **Run** again.
|
||||
|
||||
At this point, you should be able to see the **FridgeChef** icon on your phone's home screen, and open and use it normally.
|
||||
|
||||

|
||||
|
||||
This step marks the completion of one full iOS development closed loop.
|
||||
|
||||
### 5.3 Where iOS Development Errors Usually Come From
|
||||
|
||||
In real development, **encountering errors is normal**, not an exception.
|
||||
|
||||
Common issues usually come from these categories:
|
||||
|
||||
1. **Compilation errors**
|
||||
Swift syntax, type mismatches, missing parameters, etc. Xcode will directly highlight them in red.
|
||||
2. **Runtime errors**
|
||||
The app compiles, but crashes during execution - for example, array out of bounds or force-unwrapping a nil value.
|
||||
3. **Permission or configuration errors**
|
||||
Network requests blocked by the system, missing Info.plist configuration, signing issues, etc.
|
||||
4. **Logic errors**
|
||||
The app does not crash, but the behavior is wrong - for example, buttons not responding or data not refreshing.
|
||||
|
||||

|
||||
|
||||
When any error appears, you only need to **copy the full error message exactly as it is into Trae's chat box.** With awareness of the project context, Trae can help you do the debugging.
|
||||
|
||||
### 5.4 Common Real-device Debugging Errors and Solutions
|
||||
|
||||
Errors during real-device debugging are very common. These problems are usually not caused by code itself, but by device trust, security rules, or signing configuration. If the app cannot run on your iPhone smoothly, you can check this section first.
|
||||
|
||||
#### 1. Signing and registration problems
|
||||
|
||||
**Common symptoms:**
|
||||
|
||||
- Xcode shows red errors like
|
||||
`"Communication with Apple failed"`
|
||||
or
|
||||
`"No profiles for 'com.xxx.xxx' were found"`
|
||||
- Or it says
|
||||
`"Your team has no devices which are compatible"`
|
||||
|
||||
**Cause:**
|
||||
|
||||
- The Bundle Identifier is not unique or valid
|
||||
- The current iPhone has not yet been registered under your Apple ID for development
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. **Modify the Bundle Identifier**
|
||||
In Xcode project settings, change the Bundle Identifier to something more unique, such as:
|
||||
`com.yourname.FridgeChef`
|
||||
2. **Let Xcode auto-register the device**
|
||||
In the error prompt, click `Try Again` or `Register Device`, and let Xcode complete the device registration and certificate configuration automatically.
|
||||
|
||||
#### 2. Device pairing and connection problems
|
||||
|
||||
**Common symptoms:**
|
||||
|
||||
- Xcode shows
|
||||
`"Device is not available because pairing is in progress"`
|
||||
- Or it says
|
||||
`"Device Locked"`
|
||||
- Or you already tapped Trust, but Xcode still remains stuck
|
||||
|
||||

|
||||
|
||||
**Cause:**
|
||||
|
||||
- The iPhone is still locked
|
||||
- The pairing process has not fully completed
|
||||
- Xcode has not refreshed the connection state
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Unlock the phone
|
||||
Make sure the iPhone is unlocked and stays on the home screen.
|
||||
2. Finish the trust process
|
||||
When the phone pops up **Trust This Computer?**, tap **Trust** and **enter the lock-screen passcode**.
|
||||
3. Refresh the connection state
|
||||
If it is still stuck, unplug the cable, wait 2-3 seconds, and reconnect. If necessary, restart Xcode and try again.
|
||||
|
||||
#### 3. The app installs but cannot open
|
||||
|
||||
**Common symptom:**
|
||||
|
||||
- The app icon already appears on the iPhone home screen
|
||||
- The system shows
|
||||
**Untrusted Developer**
|
||||
|
||||

|
||||
|
||||
**Cause:**
|
||||
|
||||
This is an iOS security mechanism. Debug apps installed with a personal Apple ID require manual trust authorization.
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Open **Settings**
|
||||
2. Enter **General**
|
||||
3. Tap **VPN & Device Management**
|
||||
4. Under **Developer App**, find your Apple ID
|
||||
5. Tap **Trust**, then confirm again
|
||||
|
||||

|
||||
|
||||
After that, return to the home screen and tap the app again. It should now run normally.
|
||||
|
||||
## Chapter 6: If You Want to Publish the App to the App Store
|
||||
|
||||
In this tutorial, what we mainly completed is the full closed loop for a **personal development and debugging version of an app**: from creating the project, implementing functions, and debugging, all the way to successfully installing and using it on a real device.
|
||||
|
||||
If you want to go further and formally publish the app to the **Apple App Store** so that all users can download and use it, then you need to enter a more formal release process. Since that process involves a paid developer account, review rules, and compliance requirements, and is not the main practical focus of this tutorial, the following content is only provided as an **overall reference and roadmap**.
|
||||
|
||||

|
||||
|
||||
> The following content references Apple's official review requirements and public experience discussions (including original Zhihu sharing). Links are listed below. If any link becomes unavailable, you can search by title or keyword to find the original source.
|
||||
|
||||
### 6.1 Apple Developer Program
|
||||
|
||||
To publish an app to the App Store, you must join Apple's paid developer program:
|
||||
|
||||
- **Apple Developer Program** (USD $99 per year)
|
||||
- Official site: [https://developer.apple.com/](https://developer.apple.com/)
|
||||
|
||||
After joining, you can use **App Store Connect** to create the app entry, manage versions, and publish formally.
|
||||
|
||||
### 6.2 App Store Connect: Create the App Entry
|
||||
|
||||
In App Store Connect, you need to create a complete app record, including but not limited to:
|
||||
|
||||
1. App name and Bundle ID
|
||||
2. Description, keywords, and privacy policy link
|
||||
3. App icon, screenshots, and preview materials
|
||||
4. Pricing and distribution region settings
|
||||
|
||||
All this information must be completed before submission can proceed.
|
||||
|
||||
### 6.3 Build and Submit for Review
|
||||
|
||||
After the metadata is ready, you need to:
|
||||
|
||||
1. Use the paid developer account in Xcode to sign a Release build
|
||||
2. Build and upload the formal version
|
||||
3. Submit it for review in App Store Connect
|
||||
|
||||
After submission, the app enters Apple's review queue. The review time is typically 1-3 days, depending on the case.
|
||||
|
||||
### 6.4 Review Rules and Common Reasons for Rejection
|
||||
|
||||
Apple mainly reviews apps from the following aspects:
|
||||
|
||||
- functionality and stability
|
||||
- privacy and data compliance
|
||||
- consistency between metadata and actual functionality
|
||||
- whether there is infringement or misleading behavior
|
||||
|
||||
If the app does not meet requirements, the review will be rejected and Apple will provide a specific reason. The developer then needs to modify the app and resubmit.
|
||||
|
||||
### 6.5 What to Do After Rejection
|
||||
|
||||
If the app is rejected, you can:
|
||||
|
||||
- modify the code or description according to the feedback
|
||||
- resubmit the version
|
||||
- communicate with the review team through App Store Connect
|
||||
|
||||
This is a very common part of the publishing process and does not mean the project has failed.
|
||||
|
||||
### Reference sources
|
||||
|
||||
The following content references Apple's official documentation and public experience sharing:
|
||||
|
||||
- App Store Review Guidelines (Apple official)
|
||||
[https://developer.apple.com/app-store/review/guidelines/](https://developer.apple.com/app-store/review/guidelines/?utm_source=chatgpt.com)
|
||||
- Official guide to submitting for review
|
||||
[https://developer.apple.com/cn/help/app-store-connect/manage-submissions-to-app-review/submit-for-review](https://developer.apple.com/cn/help/app-store-connect/manage-submissions-to-app-review/submit-for-review?utm_source=chatgpt.com)
|
||||
- Full illustrated guide to iOS App Store publishing and review pitfalls (Zhihu)
|
||||
[https://zhuanlan.zhihu.com/p/146128612](https://zhuanlan.zhihu.com/p/146128612)
|
||||
|
||||
## Chapter 7: Summary
|
||||
|
||||

|
||||
|
||||
Congrats! At this point, you have personally walked through the complete iOS app development process from 0 to 1. From setting up the environment, running the project, and then gradually landing interface, functionality, data, and real-device testing, all the key stages have been completed smoothly. More importantly, you did not get here by memorizing Swift syntax - you handed most of the implementation to AI. No matter what your background is, every attempt like this makes you more fluent, and you will realize that iOS development is not as difficult as it once seemed. Even if you could not write a single line of code before, you can still build your own app.
|
||||
|
||||
Looking back, the whole process is not actually that complicated: decide what you want to build, use HTML to test the interface quickly, convert it into SwiftUI, connect the API and local data, and then run through debugging once. Based on this, in the future you can also casually build a personal alarm clock, a minimal todo list, or even a chatbot that speaks in the tone of your favorite celebrity.
|
||||
|
||||
This is exactly the most important thing that this tutorial - and easy-vibe - wants to teach you. I am looking forward to the newest creations from all of you future vibe coding masters, and to the day I get dazzled by your work.
|
||||
@@ -0,0 +1,681 @@
|
||||
# How to Build a Local PWA App: Turn a Website into a "Real App"
|
||||
|
||||
# 1 What PWA and PWA Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: **from an ordinary web project to a "real app" that can be installed on a desktop and a phone home screen and still works when offline.** You will personally turn a React app into a PWA, deploy it online, and install it on your phone for testing.
|
||||
|
||||
What we are going to build is a **Tomato Farm** app - a PWA that perfectly combines the Pomodoro technique with a farming game. You earn points through 25 minutes of focused work, then use those points to buy seeds and plant crops. As your level increases, you unlock more farmland and better seeds. Most importantly, it keeps working even without internet, and all data is stored locally.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- A computer (Windows or Mac)
|
||||
- A Node.js environment (version 18.0 or above)
|
||||
- Your AI coding assistant (Cursor / Trae / Claude Code, etc.)
|
||||
- A phone (for testing mobile installation)
|
||||
|
||||
## 1.1 Definition of PWA
|
||||
|
||||
**PWA (Progressive Web App)** is a special kind of website. Through **Service Worker** technology, it gains the ability to "cache and take over itself."
|
||||
|
||||
### Why ordinary websites cannot work offline, but PWAs can
|
||||
|
||||
An ordinary website needs to download HTML, CSS, and JS files from the server every time it opens, so if the network is down, it simply cannot load. A PWA, on the other hand, uses a **Service Worker** (a JS script running in the browser background) to cache these files locally on the first visit. After that, even if the network is disconnected, the Service Worker can read files directly from local cache and display the page normally.
|
||||
|
||||
**A simple analogy**: an ordinary website is like borrowing a book from a library every time (you must have internet), while a PWA is like buying the book and putting it on your own bookshelf (after the first download, you can still read it offline).
|
||||
|
||||
### PWA vs Ordinary Website vs Native App
|
||||
|
||||
| Feature | Ordinary Website | PWA | Native App |
|
||||
|------|---------|-----|---------|
|
||||
| **Installation** | Not needed | Optional (add to home screen) | Must download from app store |
|
||||
| **Offline use** | ❌ No | ✅ Yes (after caching) | ✅ Yes |
|
||||
| **Update method** | Auto refresh | Auto / background update | Manual user update |
|
||||
| **Size** | None | A few hundred KB to a few MB | Tens of MB or more |
|
||||
| **Development cost** | Low | Low (one codebase) | High (separate iOS / Android) |
|
||||
|
||||
**One-sentence summary**: a PWA is "a webpage that can store its own files" - it has the lightness of a website (no installation required, auto-updating) and the experience of a native app (offline support, installable to desktop/home screen).
|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 Why Choose PWA?
|
||||
|
||||
In the Vibe Coding era, PWA is one of the most cost-effective "cross-platform solutions":
|
||||
|
||||
| Comparison Dimension | Native App | PWA |
|
||||
|---------|---------|-----|
|
||||
| Development cost | Must develop iOS / Android / desktop separately | One codebase for all platforms |
|
||||
| Installation | Must go to app store | Install directly in browser, instant |
|
||||
| Update method | Users must update manually | Auto updates, invisible to user |
|
||||
| Package size | Often tens of MB | Usually only a few hundred KB |
|
||||
| Offline support | Built in naturally | Supported through Service Worker |
|
||||
| Best scenarios | Deep hardware access needed (AR / Bluetooth, etc.) | Content display, tools, lightweight apps |
|
||||
|
||||
**One-sentence summary**: if your app does not need AR through camera or Bluetooth hardware access, PWA is almost the easiest choice.
|
||||
|
||||
## 1.5 Tutorial Roadmap
|
||||
|
||||
To make the learning process less boring, this tutorial revolves around a fun and practical case - **Tomato Farm**. It is a Pomodoro farming game that combines focused work with gamified rewards. Together with the Vibe Coding mode of AI coding assistants, we will break the process from zero to phone installation into a reusable route:
|
||||
|
||||
1. **Build understanding and environment**: understand what PWA is, install Node.js and an AI coding assistant, and make sure the toolchain is smooth.
|
||||
2. **Build the project skeleton**: create a React + TypeScript project that can run locally.
|
||||
3. **AI iterative development**: through conversation with AI, build Pomodoro countdown, farming system, level system, SVG crop rendering, and more.
|
||||
4. **PWA configuration and offline testing**: add Service Worker and Manifest, then verify offline support.
|
||||
5. **Deployment and phone installation**: deploy to Vercel to get an HTTPS URL, then install and use it on a phone.
|
||||
|
||||
This section only gives the big picture, without expanding the exact commands. For now, just remember the main line: **Environment setup -> Skeleton building -> AI description and generation -> PWA configuration -> Deployment delivery**. In the next chapters, we will walk through each step with you.
|
||||
|
||||
# 2 Development Environment Setup
|
||||
|
||||
## 2.1 Tools Used in This Tutorial
|
||||
|
||||
During the whole development process we use three tools together, and they take the roles of "design," "construction," and "acceptance."
|
||||
|
||||
- **AI coding assistant (Cursor / Trae / Claude Code)**: this is your **AI coding partner**. In Vibe Coding mode, we no longer need to write code line by line. Instead, we mainly tell AI in natural language what functionality we want, and it handles code generation and modification.
|
||||
- **Node.js + Vite**: these are the **project build factory**. Node.js provides the JavaScript runtime, and Vite is a next-generation frontend build tool with extremely fast speed, especially suitable for building PWAs.
|
||||
- **A phone**: this acts as the **test device** to verify the running result. You can directly access the deployed PWA in the browser on your phone and test the real installation and offline functionality.
|
||||
|
||||
## 2.2 Install Node.js
|
||||
|
||||
Node.js is the basic environment for PWA development. Visit the official website [https://nodejs.org](https://nodejs.org) and download the **LTS (Long Term Support)** version (this tutorial is based on Node.js 18.x or above).
|
||||
|
||||
After download, install it like ordinary software by double-clicking the installer and keeping default options.
|
||||
|
||||
After installation, open the terminal (CMD / PowerShell on Windows, Terminal on Mac) and run:
|
||||
|
||||
```bash
|
||||
node --version
|
||||
npm --version
|
||||
```
|
||||
|
||||
If you see version outputs such as `v18.17.0` and `9.6.7`, it means installation is successful.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 2.3 Install the AI Coding Assistant
|
||||
|
||||
The AI coding assistant is the main battlefield of **Vibe Coding**. You can simply understand it as an **"editor with a super AI built in."**
|
||||
|
||||
**Recommended choices:**
|
||||
|
||||
- **Trae**: visit [https://www.trae.cn](https://www.trae.cn) and download the matching version for your OS
|
||||
- **Cursor**: visit [https://cursor.sh](https://cursor.sh) and install it
|
||||
- **Claude Code**: if you are already using Claude, you can use Claude Code directly
|
||||
|
||||
The installation process is very simple, just like installing normal software. After preparing this tool, in later practice we no longer need to stare at boring code windows. Instead, we will open the project here and use natural language in the chat box to ask AI to write code and fix bugs.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 2.4 Create a New Project
|
||||
|
||||
Open your AI coding assistant and enter the following Prompt in the chat box:
|
||||
|
||||
```text
|
||||
Please help me create a React project named tomato-farm-pwa for building a Tomato Farm app.
|
||||
It needs to support TypeScript, and also include PWA functionality (the kind that can be installed to a phone home screen).
|
||||
```
|
||||
|
||||
AI will automatically perform the following steps:
|
||||
|
||||
**Step 1: Create the project**
|
||||
|
||||
```bash
|
||||
npm create vite@latest tomato-farm-pwa -- --template react-ts
|
||||
```
|
||||
|
||||
**Step 2: Enter the project and install dependencies**
|
||||
|
||||
```bash
|
||||
cd tomato-farm-pwa
|
||||
npm install
|
||||
```
|
||||
|
||||
**Step 3: Install the PWA plugin**
|
||||
|
||||
```bash
|
||||
npm install vite-plugin-pwa -D
|
||||
```
|
||||
|
||||
After AI finishes, your project structure will roughly look like this:
|
||||
|
||||
```text
|
||||
tomato-farm-pwa/
|
||||
├── public/ # Static assets (icons, SVG materials go here)
|
||||
├── src/
|
||||
│ ├── App.tsx # Main component
|
||||
│ ├── main.tsx # Entry file
|
||||
│ └── App.css # Styles
|
||||
├── index.html # HTML entry
|
||||
├── vite.config.ts # Vite config (PWA config goes here)
|
||||
├── package.json
|
||||
└── tsconfig.json
|
||||
```
|
||||
|
||||
## 2.5 Understand the Project Structure
|
||||
|
||||
After the project is created, we need to understand the role of several key files:
|
||||
|
||||
| File / Directory | Purpose |
|
||||
|----------|---------|
|
||||
| `src/App.tsx` | Main application component, where the core page logic is written |
|
||||
| `src/main.tsx` | Application entry file, responsible for mounting the React app |
|
||||
| `vite.config.ts` | Vite configuration file, where the core PWA config is written |
|
||||
| `public/` | Static asset directory, where PWA icons and SVG materials go |
|
||||
| `index.html` | HTML entry file, usually does not need modification |
|
||||
|
||||
As beginners, we mainly need to care about three parts:
|
||||
|
||||
- `App.tsx`: controls program behavior and decides "what appears on screen"
|
||||
- `vite.config.ts`: configures PWA behavior and decides "how the app is installed and cached"
|
||||
- `public/`: stores the app icons and assets
|
||||
|
||||
## 2.6 Prepare App Icons
|
||||
|
||||
PWA needs icons before it can be installed. At minimum, we need two PNG images in **192x192** and **512x512** sizes.
|
||||
|
||||
You can ask AI to generate them:
|
||||
|
||||
```text
|
||||
Please help me generate two app icons with sizes 192x192 and 512x512.
|
||||
Use a green gradient background and draw a red tomato in the middle. Save them in the public folder.
|
||||
```
|
||||
|
||||
Or you can also create your own icons with any design tool (Figma, Canva) and put them into the `public/` directory.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 2.7 Configure `vite-plugin-pwa`
|
||||
|
||||
This is the most critical step. Open `vite.config.ts` and ask AI to configure the PWA plugin:
|
||||
|
||||
```text
|
||||
Please help me change vite.config.ts into a PWA configuration so the webpage can be installed to a phone home screen:
|
||||
- The app name is "Tomato Farm", with a green theme
|
||||
- Use icon-192.png and icon-512.png from the public directory as icons
|
||||
- Enable automatic updates
|
||||
- Cache all js, css, html, and image files so the app can work offline
|
||||
```
|
||||
|
||||
AI will generate a configuration similar to this:
|
||||
|
||||
```typescript
|
||||
import { defineConfig } from 'vite'
|
||||
import react from '@vitejs/plugin-react'
|
||||
import { VitePWA } from 'vite-plugin-pwa'
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [
|
||||
react(),
|
||||
VitePWA({
|
||||
registerType: 'autoUpdate',
|
||||
manifest: {
|
||||
name: 'Tomato Farm',
|
||||
short_name: 'Tomato Farm',
|
||||
description: 'Focus, plant, and grow',
|
||||
theme_color: '#4CAF50',
|
||||
background_color: '#ffffff',
|
||||
display: 'standalone',
|
||||
icons: [
|
||||
{
|
||||
src: '/icon-192.png',
|
||||
sizes: '192x192',
|
||||
type: 'image/png'
|
||||
},
|
||||
{
|
||||
src: '/icon-512.png',
|
||||
sizes: '512x512',
|
||||
type: 'image/png'
|
||||
}
|
||||
]
|
||||
},
|
||||
workbox: {
|
||||
globPatterns: ['**/*.{js,css,html,ico,png,svg}']
|
||||
}
|
||||
})
|
||||
]
|
||||
})
|
||||
```
|
||||
|
||||
**Key configuration explanation:**
|
||||
|
||||
* `registerType: 'autoUpdate'`: when you publish a new version, the app will update automatically the next time users open it, without manual operation.
|
||||
* `display: 'standalone'`: after installation, it runs in its own window, without browser address bar, and feels like a native app.
|
||||
* `workbox.globPatterns`: tells the Service Worker which file types should be cached and still accessible offline.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
# 3 Build the Tomato Farm PWA
|
||||
|
||||
In the previous two chapters, we already understood what a PWA is and completed the environment setup. From this section onward, we stop talking only in theory and move into hands-on practice. We will use Vibe Coding mode to build a fun and practical app from scratch - **Tomato Farm**. It perfectly combines the Pomodoro technique with gamified incentives and covers the core elements of PWA development: **UI interaction (Pomodoro timer), data storage (points and crops), and offline capability (Service Worker caching).**
|
||||
|
||||
Now, let us send the first instruction to AI.
|
||||
|
||||
## 3.1 The First "Master Prompt": From Zero to One
|
||||
|
||||
In Vibe Coding mode, we do not need to follow the traditional approach of first creating layout files and then writing logic code. What we need to do is **describe the requirements clearly in one shot and let AI generate the first runnable version**.
|
||||
|
||||
Open the project directory we just created in your AI coding assistant, and enter the following Prompt:
|
||||
|
||||
```text
|
||||
Please help me write the main page for the Tomato Farm app, with the following functions:
|
||||
|
||||
**Pomodoro Timer**
|
||||
- A 25-minute countdown timer with start, pause, and reset
|
||||
- Show remaining time and a progress bar
|
||||
- Give the user 10 points after completing one focus session
|
||||
|
||||
**Farming System**
|
||||
- 3 plots of farmland, but initially only the first one is available; the later ones are unlocked after leveling up
|
||||
- A shop to buy seeds: carrot costs 5 points, tomato 10 points, corn 15 points
|
||||
- After buying seeds and planting them, crops slowly grow, and when mature they can be harvested for points
|
||||
|
||||
**Level System**
|
||||
- Level by total points: 0-100 points = Beginner Farmer, 100-300 = Skilled Farmer, above 300 = Farm Master
|
||||
- Unlock new land and better seeds after leveling up
|
||||
|
||||
**UI Design**
|
||||
- Top shows level, points, and upgrade progress bar
|
||||
- Middle shows the Pomodoro countdown
|
||||
- Below is the farmland grid
|
||||
- Bottom has the shop button
|
||||
- Use a green theme and make it look fresh and cute
|
||||
- Must adapt to phone screens
|
||||
|
||||
**Data Saving**
|
||||
- All data (points, level, farmland state) must be saved, and refreshing the page should not lose it
|
||||
```
|
||||
|
||||
After sending it, you will see AI start reasoning and analyzing your project structure. A few seconds later, it will directly generate the complete code for `App.tsx`.
|
||||
|
||||
1. From its response, we can see its reasoning logic and interaction logic
|
||||
2. We can directly see which code it changed
|
||||
3. If we are not satisfied, we can roll back to the previous version
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 3.2 Run and Preview (Local Development Server)
|
||||
|
||||
Now AI has completed the first round of development, but remember: what we see in the coding assistant is still just code "blueprints," not a truly interactive app. We need to start a local development server so we can actually run the code and view the real effect.
|
||||
|
||||
Run this in the terminal of your AI coding assistant:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
After a few seconds, the terminal will show output like this:
|
||||
|
||||
```text
|
||||
VITE v5.0.0 ready in 300 ms
|
||||
|
||||
-> Local: http://localhost:5173/
|
||||
-> Network: use --host to expose
|
||||
-> press h + enter to show help
|
||||
```
|
||||
|
||||
Open `http://localhost:5173/` in your browser, and you should see:
|
||||
|
||||
- level, points, and a progress bar at the top
|
||||
- a Pomodoro countdown in the middle
|
||||
- farmland area below
|
||||
- a shop button at the bottom
|
||||
|
||||
Try clicking the "Start Focus" button and see if the countdown works properly. Click on a farmland tile and see if you can buy seeds and plant them. This is the first version of your PWA app.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 3.3 Optimization Iteration (Add SVG Crops and Animation)
|
||||
|
||||
At this point, our app already has a basic shape: Pomodoro timer, farming system, and leveling system. But it may still look rough, with crops perhaps shown only as text or simple blocks. Next, we will add beautiful SVG crops and growth animation to make the Tomato Farm come alive.
|
||||
|
||||
**This is exactly where Vibe Coding becomes so attractive.** In traditional development, drawing SVG graphics and building complex growth animations can be a nightmare for beginners. You not only need to handle SVG path drawing, but also calculate animation curves. In Vibe Coding mode, you do not need to worry about those low-level details. You just tell AI like a director: "Give the crops nicer SVG graphics and make them grow with animation," and the complex code appears almost instantly.
|
||||
|
||||
**Step 1: Prepare SVG crop assets**
|
||||
|
||||
You can ask AI to draw SVG directly in code, or prepare SVG files and put them under `public/`. In this tutorial, we recommend letting AI generate SVG code directly because it is more flexible.
|
||||
|
||||
**Step 2: Send an iteration instruction**
|
||||
|
||||
Return to the AI coding assistant and enter the following Prompt:
|
||||
|
||||
```text
|
||||
Please make the crops look better and add growth animation:
|
||||
|
||||
**Crop graphics**
|
||||
- Carrot: orange body with green leaves
|
||||
- Tomato: red round shape with little green leaves
|
||||
- Corn: yellow corn cob with green outer leaves
|
||||
Just use simple shapes
|
||||
|
||||
**Growth animation**
|
||||
- When first planted, it starts as a small sprout and gradually grows to maturity
|
||||
- Show 3 stages
|
||||
|
||||
**Harvest effect**
|
||||
- When clicking a mature crop, play a simple harvest animation
|
||||
- Show how many points were gained
|
||||
|
||||
**Overall polish**
|
||||
- Farmland tiles should have borders and background color
|
||||
- Crops should appear centered in the tile
|
||||
- Overall style should feel a little cuter
|
||||
```
|
||||
|
||||
AI will modify the code again and handle the SVG rendering and animation logic. After it finishes, refresh the browser, and you should see better crop graphics and smooth growth animations.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 3.4 Add Sound Effects and Notifications (Optional)
|
||||
|
||||
If you want Tomato Farm to feel more immersive, you can also add sound effects and notifications. This also only needs a simple Prompt:
|
||||
|
||||
```text
|
||||
Please add sound effects and notifications to Tomato Farm:
|
||||
|
||||
**Sound effects**
|
||||
- Play a "ding" when focus starts
|
||||
- Play a victory sound when focus is completed
|
||||
- Also add matching sound effects for planting and harvesting
|
||||
|
||||
**Notifications**
|
||||
- Show "Congratulations, you finished a focus session!" after a focus cycle ends
|
||||
- Show "Congratulations, you leveled up to XX!" when leveling up
|
||||
- Show "You unlocked a new farmland plot!" when new land is unlocked
|
||||
|
||||
You can implement this with simple audio files or the Web Audio API
|
||||
```
|
||||
|
||||
AI will help you add sound effects and notifications, making the Tomato Farm more lively and enjoyable.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
# 4 Experience the PWA Locally
|
||||
|
||||
## 4.1 Build and Preview
|
||||
|
||||
The PWA Service Worker only takes effect in production builds (it will not register in development mode). So we need to build first, then preview:
|
||||
|
||||
```text
|
||||
Please help me run these commands:
|
||||
1. npm run build (build production version)
|
||||
2. npm run preview (start local preview server)
|
||||
```
|
||||
|
||||
After build, Vite will generate all files in the `dist/` directory, including the auto-generated `sw.js` (Service Worker) and `manifest.webmanifest`.
|
||||
|
||||
Once the preview server starts, open the address shown in the terminal (usually `http://localhost:4173`).
|
||||
|
||||
## 4.2 Install the PWA on Desktop
|
||||
|
||||
After opening the preview URL, you will notice an **install icon** appears on the right side of the browser address bar (usually a small download arrow or "+" sign).
|
||||
|
||||
**Chrome / Edge installation steps:**
|
||||
|
||||
1. Click the install icon on the right side of the address bar
|
||||
2. Click **Install** in the popup dialog
|
||||
3. The PWA will open in a standalone window, and a shortcut will be created on your desktop / Start Menu / Dock
|
||||
|
||||
The installed PWA looks just like a native desktop app - no address bar, no tabs, with its own window and icon. Now you can open Tomato Farm anytime and begin your focus-and-farming journey.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
**macOS Safari installation steps:**
|
||||
|
||||
1. Open the PWA URL in Safari
|
||||
2. Click **File -> Add to Dock** from the menu bar
|
||||
3. The PWA icon will appear in the Dock
|
||||
|
||||
## 4.3 Test Offline Capability
|
||||
|
||||
This is the coolest part of PWA. Let us verify whether offline mode really works:
|
||||
|
||||
1. Make sure the PWA has been opened in the browser at least once (so the Service Worker can cache resources)
|
||||
2. **Disconnect the network** (turn off Wi-Fi or unplug the cable)
|
||||
3. Refresh the page - you will find that **Tomato Farm still loads normally!**
|
||||
4. Start a Pomodoro session - after it finishes you gain points, buy seeds, plant crops - and all the data is still saved normally in `localStorage`
|
||||
|
||||
You can also open Chrome DevTools (F12) -> Application -> Service Workers to inspect Service Worker status and cached resource lists.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 4.4 Data Persistence and Sync Options
|
||||
|
||||
Now your Tomato Farm can already run offline, and all data is saved in the browser's `localStorage`. But there is one key problem: **if the user switches devices or clears browser data, all farm data will be lost**. For serious production apps, we need to think about data persistence and cross-device synchronization.
|
||||
|
||||
### 4.4.1 Limitations of Local Storage
|
||||
|
||||
The `localStorage` we are currently using has several obvious limitations:
|
||||
|
||||
| Limitation | Description |
|
||||
|--------|------|
|
||||
| **Device-bound** | Data is only stored in the current browser on the current device; switching devices means losing it |
|
||||
| **Limited capacity** | Usually only 5-10MB of storage space |
|
||||
| **Easy to lose** | Clearing browser data or uninstalling the PWA causes data loss |
|
||||
| **Cannot sync** | Progress on phone cannot sync to desktop |
|
||||
|
||||
If your Tomato Farm is just a personal tool, this may not be a problem. But if you want users to invest long term and accumulate data, a more reliable solution is needed.
|
||||
|
||||
### 4.4.2 Option 1: Cloud Sync (Recommended)
|
||||
|
||||
The most reliable solution is synchronizing data to a cloud database. For PWAs, **Supabase** is an excellent choice - it provides a PostgreSQL database, real-time subscriptions, and authentication, and also offers a free tier.
|
||||
|
||||
**Implementation idea:**
|
||||
|
||||
1. **User login**: use email or social login to establish user identity
|
||||
2. **Automatic data sync**: every operation automatically saves to the cloud
|
||||
3. **Offline-first**: the app still works when offline, then syncs automatically when the network returns
|
||||
4. **Cross-device sync**: progress on phone is available immediately on desktop
|
||||
|
||||
**Prompt example:**
|
||||
|
||||
```text
|
||||
Please help me migrate Tomato Farm data storage from localStorage to Supabase cloud sync:
|
||||
|
||||
**Functional requirements**
|
||||
- Add user login (email + password or Google login)
|
||||
- Save user data (points, level, farmland state) to Supabase database
|
||||
- Still work offline, and automatically sync when the network recovers
|
||||
- Support multi-device sync, so crops planted on the phone can also be seen on desktop
|
||||
|
||||
**Tech stack**
|
||||
- Use @supabase/supabase-js client
|
||||
- Implement optimistic updates (update UI first, then sync to cloud)
|
||||
- Add a simple sync status indicator
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
|
||||
- Data will not be lost; users only need to log in again when switching devices
|
||||
- Free tier is enough for personal projects
|
||||
- Supports real-time subscriptions, giving good multi-device sync experience
|
||||
|
||||
**Cons:**
|
||||
|
||||
- Requires user registration/login, adding usage friction
|
||||
- Needs network connection to perform syncing
|
||||
|
||||
### 4.4.3 Option 2: Export / Import Backup
|
||||
|
||||
If you do not want to add a backend service, a simpler compromise is **manual backup and restore**.
|
||||
|
||||
**Implementation idea:**
|
||||
|
||||
1. **Export**: package farm data as a JSON file and let users download it
|
||||
2. **Import**: users can select a previously exported JSON file to restore data
|
||||
3. **Automatic reminder**: remind users to back up periodically
|
||||
|
||||
**Prompt example:**
|
||||
|
||||
```text
|
||||
Please add data backup functionality to Tomato Farm:
|
||||
|
||||
**Export**
|
||||
- Add an "Export Data" button on the settings page
|
||||
- Package all data in localStorage into a JSON file
|
||||
- Automatically download it to the user's device
|
||||
|
||||
**Import**
|
||||
- Add an "Import Data" button that accepts a JSON file
|
||||
- Validate file format before restoring
|
||||
- Show a warning before import because it overwrites current data
|
||||
|
||||
**Automatic reminders**
|
||||
- If the user has not backed up for over 7 days, show a friendly reminder
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
|
||||
- Simple to implement, no backend service required
|
||||
- Users fully control their own data
|
||||
- Can transfer across devices by sharing the exported file
|
||||
|
||||
**Cons:**
|
||||
|
||||
- Requires manual operation, so the experience is not smooth
|
||||
- If the user forgets to back up, data can still be lost
|
||||
|
||||
### 4.4.4 Option 3: Browser Extension Sync (For Chrome Users)
|
||||
|
||||
If your PWA mainly targets Chrome users, you can consider **Chrome Storage Sync API**. This is a cross-device synced storage service provided by Chrome, where data automatically syncs with the user's Google account.
|
||||
|
||||
**Note:** this requires packaging the PWA as a Chrome extension as well, which is more suitable for developers with technical experience.
|
||||
|
||||
### 4.4.5 Recommended Choice Strategy
|
||||
|
||||
| Scenario | Recommended Solution |
|
||||
|------|----------|
|
||||
| Personal lightweight tool | `localStorage` only is enough |
|
||||
| Want to avoid data loss, but do not want too much complexity | Export / import backup |
|
||||
| Official product with better user experience | Supabase cloud sync |
|
||||
| Mainly for Chrome users | Chrome Storage Sync |
|
||||
|
||||
**For an app like Tomato Farm, my suggestion is:**
|
||||
|
||||
1. **MVP stage**: start with `localStorage` to verify the product idea quickly
|
||||
2. **Iteration stage**: add export / import backup so users have a data safety net
|
||||
3. **Mature stage**: integrate Supabase to achieve real cloud synchronization
|
||||
|
||||
Remember: **progressive enhancement** is the core philosophy of PWA. First make the app run, then gradually add more advanced capabilities.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
# 5 Deploy Online
|
||||
|
||||
PWA must run under HTTPS in order to work correctly. The good news is that mainstream deployment platforms now provide free HTTPS automatically. We will use **Vercel** as an example (you could also use Netlify or GitHub Pages).
|
||||
|
||||
## 5.1 Deploy to Vercel
|
||||
|
||||
**Step 1: Install the deployment tool**
|
||||
|
||||
```text
|
||||
Please help me install Vercel's deployment tool
|
||||
```
|
||||
|
||||
**Step 2: Deploy the project**
|
||||
|
||||
```text
|
||||
Please help me deploy this project to Vercel. The project name is tomato-farm-pwa
|
||||
```
|
||||
|
||||
AI will handle the deployment steps automatically. You only need to:
|
||||
- choose your account
|
||||
- confirm creating a new project
|
||||
- keep the other options at default
|
||||
|
||||
After waiting a few dozen seconds, Vercel will automatically build and deploy your project. When done, you will get an HTTPS URL like `https://tomato-farm-pwa.vercel.app`.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
**Step 3: Verify the PWA**
|
||||
|
||||
Open the deployed URL in your browser, and you should see:
|
||||
|
||||
1. an install icon appear on the right side of the address bar
|
||||
2. in DevTools -> Application -> Manifest, your configured app info such as the name "Tomato Farm"
|
||||
3. in the Service Workers tab, the Service Worker shown as activated
|
||||
|
||||
## 5.2 Deploy with GitHub Pages (Alternative)
|
||||
|
||||
If you prefer GitHub Pages, you need additional path configuration:
|
||||
|
||||
```text
|
||||
Please help me modify the config so the project can be deployed to GitHub Pages.
|
||||
My repository name is tomato-farm-pwa, so please adjust the path configuration accordingly.
|
||||
```
|
||||
|
||||
Then push the build output to the `gh-pages` branch of your GitHub repository.
|
||||
|
||||
# 6 Install the PWA on a Phone
|
||||
|
||||
This is the most exciting part - turning your Tomato Farm webpage into an "app" on your phone.
|
||||
|
||||
## 6.1 Install on Android
|
||||
|
||||
1. Open your deployed Tomato Farm PWA URL in the **Chrome browser** on your phone
|
||||
2. Chrome may automatically show an **"Add to Home screen"** prompt banner - just click it
|
||||
3. If it does not show automatically, tap the **three-dot menu** in the top-right corner -> **Install app** or **Add to Home screen**
|
||||
4. Confirm installation, and a Tomato Farm app icon will appear on your phone's home screen
|
||||
|
||||
Open it and you will notice it runs in full-screen mode, without the browser address bar or navigation buttons, looking almost exactly like a native app. Now you can start focusing and farming anytime.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
## 6.2 Install on iPhone
|
||||
|
||||
On iOS, PWA can only be installed through the **Safari** browser (other browsers do not support installation):
|
||||
|
||||
1. Open your deployed Tomato Farm PWA URL in **Safari**
|
||||
2. Tap the **Share** button at the bottom (square with an upward arrow)
|
||||
3. In the menu, choose **Add to Home Screen**
|
||||
4. Give the app a name and tap **Add**
|
||||
|
||||
Starting from iOS 26, all websites added to the home screen will open in standalone app mode by default, which is a major improvement.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
> **Known limitations on iOS:**
|
||||
> * Push notifications require iOS 16.4 or above, and the PWA must already be added to the home screen
|
||||
> * Background Sync is not supported
|
||||
> * Storage space is more limited than on Android
|
||||
|
||||
## 6.3 Audit Your PWA with Lighthouse
|
||||
|
||||
Google provides a tool called **Lighthouse**, which can score your PWA. Open Chrome DevTools (F12) -> Lighthouse -> check "Progressive Web App" -> click "Analyze page load."
|
||||
|
||||
A qualified Tomato Farm PWA should get a full score in the PWA category. If not, Lighthouse will tell you the exact reasons and suggest fixes.
|
||||
|
||||
<!-- 0 -->
|
||||
|
||||
# 7 Final Notes
|
||||
|
||||
Congratulations! You have successfully built a Pomodoro farming PWA that can be installed on both desktop and mobile. Let us review what we did:
|
||||
|
||||
1. Created a Tomato Farm web app with Vite + React
|
||||
2. Added Service Worker and Manifest via `vite-plugin-pwa`
|
||||
3. Deployed it to Vercel to get an HTTPS URL
|
||||
4. Successfully installed it on both desktop and mobile, and tested offline capability
|
||||
|
||||
Now your Tomato Farm PWA can already achieve:
|
||||
* **Focus farming**: help users stay focused through the Pomodoro mechanism
|
||||
* **Gamified rewards**: use planting, leveling, and unlocking to motivate repeated use
|
||||
* **Offline usability**: even with no network, users can still focus, plant, and manage their farm
|
||||
* **Cross-platform installation**: develop once and install on multiple kinds of devices
|
||||
|
||||
The charm of PWA is its "progressiveness" - you do not need to make it perfect at the very beginning. First make the website installable and available offline, then gradually add advanced capabilities such as push notifications and background sync.
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Push notifications**: use Push API + Notification API to remind users when a Pomodoro finishes, or when crops are ready to harvest
|
||||
* **Background sync**: use Background Sync API to sync farm data to the cloud after the network returns
|
||||
* **Smarter caching strategies**: use different Workbox strategies such as CacheFirst, NetworkFirst, and StaleWhileRevalidate for different kinds of assets
|
||||
* **Publish to app stores**: use [PWA Builder](https://www.pwabuilder.com/) to package the Tomato Farm PWA into an Android APK or Microsoft Store app
|
||||
* **Social features**: add a friend system so users can visit each other's farms and exchange crops
|
||||
|
||||
***One codebase, all platforms - this is the power of PWA. Focus, plant, and grow!***
|
||||
|
||||
# References
|
||||
|
||||
* [Vite PWA Official Docs](https://vite-pwa-org.netlify.app/guide/)
|
||||
* [Google PWA Development Guide](https://web.dev/progressive-web-apps/)
|
||||
* [MDN Web App Manifest Docs](https://developer.mozilla.org/en-US/docs/Web/Manifest)
|
||||
* [Workbox Caching Strategies Overview](https://developer.chrome.com/docs/workbox/caching-strategies-overview/)
|
||||
* [PWA Builder - Publish PWA to App Stores](https://www.pwabuilder.com/)
|
||||
@@ -0,0 +1,503 @@
|
||||
# How to Build a Browser AI Assistant Extension: Summarize Any Webpage in One Click
|
||||
|
||||
# Chapter 1: What Browser Extensions and Chrome Extension Development Are
|
||||
|
||||
In this tutorial, we will complete a full closed loop: build an AI-driven Chrome browser extension from scratch. It can read the content of any webpage you are browsing, then use AI to generate a one-click summary. You will personally complete the extension development, debugging, and learn how to publish it to the Chrome Web Store.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
- Chrome browser (version 138+ recommended if you want to use built-in AI)
|
||||
- A code editor (VS Code / Cursor / Trae)
|
||||
- (Optional) An OpenAI or Claude API Key
|
||||
|
||||
## 1.1 What Is a Browser Extension?
|
||||
|
||||
You have definitely used browser extensions before: ad blockers, translation tools, password managers... They are like "extra gear" for your browser, giving you superpowers while browsing the web.
|
||||
|
||||
Imagine this: you open a 5,000-word technical blog post, click the extension button once, and a few seconds later a concise Chinese summary appears in the side panel. That is exactly what we are going to build.
|
||||
|
||||

|
||||
|
||||
<!--  -->
|
||||
|
||||
## 1.2 The Basic Architecture of a Chrome Extension
|
||||
|
||||
Chrome extensions (based on Manifest V3) consist of several core parts, each with its own role:
|
||||
|
||||
* **Manifest file (`manifest.json`)**: the extension's "ID card," declaring its name, permissions, entry files, and more.
|
||||
* **Service Worker (background script)**: the extension's "brain," handling events and calling APIs in the background. It does not run continuously, but starts when needed.
|
||||
* **Content Script**: the extension's "eyes," injected into webpages and able to read DOM content.
|
||||
* **Side Panel**: the extension's "face," showing UI on the right side of the browser where users see AI summary results.
|
||||
* **Options Page**: lets users configure API Key and related settings.
|
||||
|
||||
Their workflow looks like this:
|
||||
|
||||
```text
|
||||
User clicks the extension icon
|
||||
-> Side panel opens
|
||||
-> User clicks the "Summarize" button
|
||||
-> Side panel notifies the Service Worker
|
||||
-> Service Worker asks Content Script to read page text
|
||||
-> Content Script returns page content
|
||||
-> Service Worker sends content to AI API
|
||||
-> AI returns the summary
|
||||
-> Service Worker sends the summary back to the side panel for display
|
||||
```
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
## 1.3 Two AI Options: Cloud API vs Built-in Browser AI
|
||||
|
||||
Our extension has two ways to access AI capability:
|
||||
|
||||
**Option A: Call cloud AI APIs (OpenAI / Claude)**
|
||||
|
||||
* Pros: powerful model capability, supports all devices
|
||||
* Cons: needs an API Key, requires internet, has usage cost
|
||||
* Best for: high-quality summaries and handling more complex content
|
||||
|
||||
**Option B: Use Chrome built-in AI (Summarizer API)**
|
||||
|
||||
Starting from Chrome 138, Google built AI capability based on Gemini Nano directly into the browser. One of them is the **Summarizer API** - it runs entirely locally, requires no API Key, no internet, and is completely free.
|
||||
|
||||
* Pros: free, privacy-friendly, no API Key needed
|
||||
* Cons: requires Chrome 138+, better hardware (4GB+ VRAM or 16GB+ RAM), model capability is weaker than cloud AI
|
||||
* Best for: users who care about privacy, do not want to pay, and have sufficient hardware
|
||||
|
||||
**This tutorial will implement both options**, and you can choose based on your own situation.
|
||||
|
||||
## 1.4 Tutorial Roadmap
|
||||
|
||||
We will build a Chrome extension called **"AI Page Summarizer"** from scratch, following these steps:
|
||||
|
||||
1. **Build the extension skeleton**: create a Manifest V3 project structure and load it into Chrome
|
||||
2. **Implement the core feature**: Content Script reads the page + Service Worker calls AI API + side panel shows results
|
||||
3. **Integrate Chrome built-in AI**: use Summarizer API to provide free local summarization
|
||||
4. **Testing and debugging**: learn Chrome extension debugging techniques
|
||||
5. **Publish to Chrome Web Store**: package and submit for review
|
||||
|
||||
# Chapter 2: Build the Extension Skeleton
|
||||
|
||||
## 2.1 Create the Project Structure
|
||||
|
||||
Open your AI coding assistant (Cursor / Trae / Claude Code), create an empty folder named `ai-page-summarizer`, then enter the following in the chat box:
|
||||
|
||||
```text
|
||||
Please help me create a Chrome browser extension project using Manifest V3.
|
||||
The project name is ai-page-summarizer, and its function is to summarize webpage content with AI.
|
||||
Please create the following file structure:
|
||||
|
||||
ai-page-summarizer/
|
||||
├── manifest.json # MV3 manifest file
|
||||
├── background.js # Service Worker background script
|
||||
├── content.js # Content script (reads webpage text)
|
||||
├── sidepanel.html # Side panel HTML
|
||||
├── sidepanel.js # Side panel logic
|
||||
├── sidepanel.css # Side panel styling
|
||||
├── options.html # Settings page
|
||||
├── options.js # Settings page logic
|
||||
└── icons/ # Icons folder
|
||||
|
||||
Requirements for manifest.json:
|
||||
1. manifest_version: 3
|
||||
2. Permissions: storage, activeTab, scripting, sidePanel
|
||||
3. Use service_worker: "background.js" for background
|
||||
4. Configure side_panel with default path sidepanel.html
|
||||
5. Configure default icon and title for action
|
||||
```
|
||||
|
||||
AI will generate the full project skeleton for you. Let us look at what each file does.
|
||||
|
||||
## 2.2 `manifest.json`: The Extension's "ID Card"
|
||||
|
||||
This is the most important file in a Chrome extension. It tells the browser what the extension is, what permissions it needs, and which components it contains:
|
||||
|
||||
```json
|
||||
{
|
||||
"manifest_version": 3,
|
||||
"name": "AI Page Summarizer",
|
||||
"version": "1.0",
|
||||
"description": "Use AI to summarize any webpage in one click",
|
||||
"permissions": ["storage", "activeTab", "scripting", "sidePanel"],
|
||||
"background": {
|
||||
"service_worker": "background.js"
|
||||
},
|
||||
"action": {
|
||||
"default_title": "AI Page Summarizer",
|
||||
"default_icon": {
|
||||
"16": "icons/icon-16.png",
|
||||
"48": "icons/icon-48.png",
|
||||
"128": "icons/icon-128.png"
|
||||
}
|
||||
},
|
||||
"side_panel": {
|
||||
"default_path": "sidepanel.html"
|
||||
},
|
||||
"options_page": "options.html",
|
||||
"icons": {
|
||||
"16": "icons/icon-16.png",
|
||||
"48": "icons/icon-48.png",
|
||||
"128": "icons/icon-128.png"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Permission explanation:**
|
||||
|
||||
* `storage`: lets the extension store data such as the user's API Key
|
||||
* `activeTab`: lets the extension access the current tab the user is viewing (only after user interaction, so it is very safe)
|
||||
* `scripting`: lets the extension inject scripts into pages to read content
|
||||
* `sidePanel`: lets the extension use Chrome side panel API
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
## 2.3 Prepare Icons
|
||||
|
||||
Chrome extensions need icons in three sizes: 16x16, 48x48, and 128x128. You can ask AI to generate them:
|
||||
|
||||
```text
|
||||
Please help me generate three simple Chrome extension icons (16x16, 48x48, 128x128),
|
||||
with a rounded rectangle, gradient purple background, and a white AI lightning symbol in the center.
|
||||
Save them in the icons/ directory as icon-16.png, icon-48.png, and icon-128.png.
|
||||
```
|
||||
|
||||
## 2.4 Load the Extension into Chrome
|
||||
|
||||
Before writing code, let us first load this "empty shell" extension into Chrome, so every later change can be previewed immediately:
|
||||
|
||||
1. Open Chrome and enter `chrome://extensions/` in the address bar
|
||||
2. Turn on **Developer mode** in the top-right corner
|
||||
3. Click **Load unpacked**
|
||||
4. Select your `ai-page-summarizer` folder
|
||||
|
||||
You will see the extension appear in the list, and its icon will show up in the Chrome toolbar.
|
||||
|
||||

|
||||
|
||||
<!--  -->
|
||||
|
||||
> **Tip**: after every code change, go back to `chrome://extensions/` and click the **refresh button (🔄)** on the extension card to update it.
|
||||
|
||||
# Chapter 3: Implement the Core Feature - Read Page + AI Summary
|
||||
|
||||
## 3.1 Content Script: Read Page Text
|
||||
|
||||
Content Script is a script injected into the webpage. It can directly access the page DOM. We use it to extract page text.
|
||||
|
||||
Ask AI to write `content.js`:
|
||||
|
||||
```text
|
||||
Please help me write content.js with the following functions:
|
||||
1. Listen for messages from Service Worker
|
||||
2. When receiving a "getPageContent" message, extract the current page text content
|
||||
3. Extraction logic: get document.body.innerText, and also get the page title and URL
|
||||
4. Return the extracted content via sendResponse
|
||||
```
|
||||
|
||||
AI will generate code like this:
|
||||
|
||||
```javascript
|
||||
// content.js
|
||||
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
if (request.action === 'getPageContent') {
|
||||
const content = document.body.innerText || document.body.textContent
|
||||
sendResponse({
|
||||
content: content.trim(),
|
||||
title: document.title,
|
||||
url: window.location.href
|
||||
})
|
||||
}
|
||||
return true // Keep the message channel open
|
||||
})
|
||||
```
|
||||
|
||||
## 3.2 Service Worker: Call AI API
|
||||
|
||||
Service Worker is the extension's "brain." It coordinates communication among components and calls external AI APIs.
|
||||
|
||||
Ask AI to write `background.js`:
|
||||
|
||||
```text
|
||||
Please help me write background.js with the following functions:
|
||||
1. When the user clicks the extension icon, open the side panel
|
||||
2. Listen for "summarize" messages from the side panel
|
||||
3. After receiving the message, send "getPageContent" to the content script in the current tab to get page content
|
||||
4. After receiving the page content, read the user's configured API Key and model selection from chrome.storage.local
|
||||
5. Call the corresponding AI API according to the configuration (support OpenAI and Claude)
|
||||
6. Send the AI summary back to the side panel
|
||||
|
||||
For OpenAI, call https://api.openai.com/v1/chat/completions and use model gpt-4o-mini
|
||||
For Claude, call https://api.anthropic.com/v1/messages and use model claude-sonnet-4-20250514
|
||||
System prompt: Please summarize the following webpage content in Chinese, extract the key points, and keep it within 300 Chinese characters.
|
||||
```
|
||||
|
||||
Core code looks like this:
|
||||
|
||||
```javascript
|
||||
// background.js
|
||||
|
||||
// Open the side panel when the user clicks the icon
|
||||
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true })
|
||||
|
||||
// Listen for messages from the side panel
|
||||
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
if (request.action === 'summarize') {
|
||||
handleSummarize(request.tabId).then(sendResponse)
|
||||
return true // Async response
|
||||
}
|
||||
})
|
||||
|
||||
async function handleSummarize(tabId) {
|
||||
// 1. Get page content
|
||||
const [response] = await chrome.tabs.sendMessage(tabId, {
|
||||
action: 'getPageContent'
|
||||
})
|
||||
|
||||
// 2. Read user settings
|
||||
const { apiKey, provider } = await chrome.storage.local.get([
|
||||
'apiKey', 'provider'
|
||||
])
|
||||
|
||||
if (!apiKey) {
|
||||
return { error: 'Please configure your API Key in the settings page first' }
|
||||
}
|
||||
|
||||
// 3. Call AI API
|
||||
const summary = provider === 'claude'
|
||||
? await callClaude(response.content, apiKey)
|
||||
: await callOpenAI(response.content, apiKey)
|
||||
|
||||
return { summary, title: response.title }
|
||||
}
|
||||
```
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
## 3.3 Side Panel UI: Show Summary Result
|
||||
|
||||
The side panel is the main interaction UI for users. Ask AI to write the HTML, CSS, and JS for the side panel:
|
||||
|
||||
```text
|
||||
Please help me write these three files for the side panel:
|
||||
|
||||
sidepanel.html:
|
||||
- Show the plugin name "AI Page Summarizer" at the top
|
||||
- A blue "Summarize Current Page" button
|
||||
- A loading animation area (hidden by default)
|
||||
- A result display area showing the page title and AI summary
|
||||
- A "Copy Summary" button at the bottom
|
||||
|
||||
sidepanel.css:
|
||||
- Clean modern design, similar to Notion typography
|
||||
- Width adapts to the side panel
|
||||
- Buttons have hover effects
|
||||
- Loading animation implemented with CSS
|
||||
|
||||
sidepanel.js:
|
||||
- When clicking the "Summarize" button, get the current tab ID
|
||||
- Send a summarize message to background.js
|
||||
- Show loading animation
|
||||
- Hide loading and display summary after receiving result
|
||||
- Use navigator.clipboard.writeText in the "Copy" button to copy text
|
||||
```
|
||||
|
||||

|
||||
|
||||
<!--  -->
|
||||
|
||||
## 3.4 Settings Page: Configure API Key
|
||||
|
||||
Users need a place to enter their own API Key. Ask AI to write the settings page:
|
||||
|
||||
```text
|
||||
Please help me write options.html and options.js:
|
||||
- A dropdown to choose AI provider (OpenAI / Claude)
|
||||
- A password input for API Key (type="password")
|
||||
- A "Save" button
|
||||
- Save config with chrome.storage.local.set
|
||||
- Read saved config from storage and fill the form on page load
|
||||
- Show "Settings saved" after saving
|
||||
```
|
||||
|
||||
> **Security reminder**: the API Key is stored in `chrome.storage.local` and only kept on the local device. But if you want to publish this extension to the Chrome Web Store for others to use, a safer approach is to build a backend proxy server so the API Key is not exposed directly on the client side.
|
||||
|
||||

|
||||

|
||||

|
||||
<!--  -->
|
||||
|
||||
# Chapter 4: Use Chrome Built-in AI (No API Key Needed)
|
||||
|
||||
Starting from Chrome 138, Google built AI capability based on **Gemini Nano** directly into the browser. The one best suited for our case is the **Summarizer API** - it runs entirely locally, needs no API Key, needs no internet, and is free.
|
||||
|
||||
## 4.1 Check Browser Support
|
||||
|
||||
Built-in AI has hardware requirements:
|
||||
|
||||
* Desktop Chrome 138+ (Windows 10+, macOS 13+, Linux, ChromeOS)
|
||||
* 22 GB available storage space (for model download)
|
||||
* 4GB+ GPU VRAM, or 16GB+ system RAM with 4+ CPU cores
|
||||
|
||||
Enter `chrome://flags` in Chrome address bar, search for the flag related to Summarization, and ensure it is **Enabled**.
|
||||
* In Chrome 131-137, this switch is called Summarization API.
|
||||
* In Chrome 138-144, it was renamed to Summarization API for Gemini Nano.
|
||||
* In Chrome 145+, Summarization API for Gemini Nano was removed, and its summarization function was integrated into Prompt API for Gemini Nano.
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
## 4.2 Use Summarizer API
|
||||
|
||||
Ask AI to add built-in AI support in `background.js`:
|
||||
|
||||
```text
|
||||
Please help me add Chrome built-in Summarizer API support in background.js:
|
||||
1. Add a summarizeWithBuiltinAI function
|
||||
2. First check whether Summarizer.availability() returns 'readily-available'
|
||||
3. If available, create a summarizer instance, configure type as 'key-points', format as 'markdown', and length as 'medium'
|
||||
4. Call summarizer.summarize() to summarize
|
||||
5. In handleSummarize, add a branch for provider === 'builtin'
|
||||
```
|
||||
|
||||
Core code:
|
||||
|
||||
```javascript
|
||||
async function summarizeWithBuiltinAI(text) {
|
||||
// Check availability
|
||||
const availability = await Summarizer.availability()
|
||||
if (availability !== 'readily-available') {
|
||||
throw new Error('Chrome built-in AI is not available. Please check browser version and hardware requirements.')
|
||||
}
|
||||
|
||||
// Create summarizer
|
||||
const summarizer = await Summarizer.create({
|
||||
type: 'key-points',
|
||||
format: 'markdown',
|
||||
length: 'medium'
|
||||
})
|
||||
|
||||
// Run summary
|
||||
const summary = await summarizer.summarize(text, {
|
||||
context: 'This is a webpage article'
|
||||
})
|
||||
|
||||
return summary
|
||||
}
|
||||
```
|
||||
|
||||
## 4.3 Update the Settings Page
|
||||
|
||||
Add a **"Chrome Built-in AI (Free, No API Key Needed)"** option to the provider dropdown in `options.html`. When users choose it, hide the API Key input because it is no longer needed.
|
||||
|
||||
```text
|
||||
Please help me modify options.html and options.js:
|
||||
1. Add an option "Chrome built-in AI (free, no API Key needed)" to the provider dropdown, with value "builtin"
|
||||
2. Hide the API Key input when builtin is selected
|
||||
3. Show the API Key input when OpenAI or Claude is selected
|
||||
```
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
# Chapter 5: Testing and Debugging
|
||||
|
||||
## 5.1 Local Testing Workflow
|
||||
|
||||
Debugging Chrome extensions is a bit different from debugging normal webpages:
|
||||
|
||||
**Debug Service Worker:**
|
||||
1. Open `chrome://extensions/`
|
||||
2. Find your extension and click the **Service Worker** link
|
||||
3. A dedicated DevTools window opens where you can see `console.log` output and network requests
|
||||
|
||||
**Debug Side Panel:**
|
||||
1. Open the side panel
|
||||
2. Right-click inside the side panel content
|
||||
3. Choose **Inspect**
|
||||
4. This opens DevTools for the side panel
|
||||
|
||||
**Debug Content Script:**
|
||||
1. Open DevTools with F12 on any webpage
|
||||
2. In the Console panel, click the execution context dropdown in the top-left
|
||||
3. Select your extension name
|
||||
4. Then you can see `console` output from the Content Script
|
||||
|
||||

|
||||
<!--  -->
|
||||
|
||||
## 5.2 Common Troubleshooting
|
||||
|
||||
| Problem | Possible Cause | Solution |
|
||||
|------|---------|---------|
|
||||
| Clicking the icon does nothing | Service Worker error | Check the Service Worker DevTools Console |
|
||||
| Cannot get page content | Content Script not injected | Refresh the page and try again, check `matches` config in manifest |
|
||||
| API call fails | API Key is wrong or expired | Re-enter the API Key in the settings page |
|
||||
| Side panel is blank | `sidepanel.html` path is wrong | Check `side_panel.default_path` in manifest |
|
||||
|
||||
|
||||
# Chapter 6: Publish to Chrome Web Store (Optional)
|
||||
|
||||
If you want to share the extension with others, you can publish it to the Chrome Web Store.
|
||||
|
||||
## 6.1 Prepare for Publishing
|
||||
|
||||
1. **Register a developer account**: visit [Chrome Web Store Developer Dashboard](https://chrome.google.com/webstore/devconsole) and pay the one-time $5 registration fee
|
||||
2. **Enable 2-Step Verification**: your Google account must enable 2-Step Verification before publishing
|
||||
3. **Prepare assets**:
|
||||
* Extension icon: 128x128 PNG
|
||||
* At least one screenshot: 1280x800 recommended
|
||||
* Detailed functional description
|
||||
* Privacy policy explanation (if your extension processes user data)
|
||||
|
||||
## 6.2 Package and Upload
|
||||
|
||||
1. Compress the extension folder as a `.zip` file (not `.crx`)
|
||||
2. Click **New Item** in Developer Dashboard
|
||||
3. Upload the `.zip` file
|
||||
4. Fill in store information (name, description, screenshots, category, etc.)
|
||||
5. Fill in privacy practices (declare what user data your extension collects)
|
||||
6. Click **Submit for Review**
|
||||
|
||||
Google will review submitted extensions, which usually takes several business days. The fewer permissions you request and the clearer your description is, the faster the review usually goes.
|
||||
|
||||

|
||||

|
||||
|
||||
<!--  -->
|
||||
|
||||
# Chapter 7: Final Notes
|
||||
|
||||
Congratulations! You have built an AI-driven browser extension from scratch. Let us review what we did:
|
||||
|
||||
1. Understood the Manifest V3 architecture of Chrome extensions
|
||||
2. Used Content Script to read webpage content
|
||||
3. Used Service Worker to call AI APIs and generate summaries
|
||||
4. Used Side Panel to display the summary result
|
||||
5. Also learned how to use Chrome built-in AI without any API Key
|
||||
|
||||
Browser extension development is a very interesting field - it lets you "enhance" any webpage on the internet. Besides summarizing pages, you can build many more things with a similar architecture:
|
||||
|
||||
**Advanced directions:**
|
||||
|
||||
* **Translation assistant**: translate foreign webpages into Chinese in one click
|
||||
* **Reading annotations**: highlight and annotate pages, then save to the cloud
|
||||
* **Price tracking**: monitor price changes on e-commerce pages and notify users
|
||||
* **Code explainer**: select code on GitHub and let AI explain it automatically
|
||||
|
||||
The arrival of Chrome built-in AI lowers the barrier even further - you do not even need an API Key to build AI-powered extensions. As browser AI capabilities continue to grow, the imagination space in this field will only get larger.
|
||||
|
||||
***Go give your browser some superpowers!***
|
||||
|
||||
# References
|
||||
|
||||
* [Chrome Extension Official Docs - Manifest V3](https://developer.chrome.com/docs/extensions/develop/)
|
||||
* [Publish Chrome Extension to Chrome Web Store](https://developer.chrome.com/docs/webstore/publish?hl=zh-cn)
|
||||
* [Chrome Side Panel API](https://developer.chrome.com/docs/extensions/reference/api/sidePanel)
|
||||
* [Chrome Built-in AI - Summarizer API](https://developer.chrome.com/docs/ai/summarizer-api)
|
||||
* [Chrome Built-in AI - Prompt API](https://developer.chrome.com/docs/ai/prompt-api)
|
||||
* [OpenAI API Docs](https://platform.openai.com/docs/api-reference)
|
||||
* [Anthropic Claude API Docs](https://docs.anthropic.com/en/docs/)
|
||||
* [Anthropic Claude API Docs](https://developer.chrome.com/docs/webstore/publish?hl=zh-cn)
|
||||
+97
-37
@@ -1,93 +1,153 @@
|
||||
# Advanced Development
|
||||
|
||||
Welcome to the **Advanced Development** stage! Here, you will build complex cross-platform applications, master WeChat Mini Program development, and challenge yourself with more advanced AI-native application development.
|
||||
Welcome to the **Advanced Development** stage! Here, you will build complex cross-platform applications, master WeChat Mini Program development in practice, and explore deeper AI-native application development.
|
||||
|
||||
## What You Will Learn
|
||||
|
||||
### Core Skills
|
||||
|
||||
Deeply master the MCP protocol and Claude Code advanced techniques to improve development efficiency:
|
||||
Master the MCP protocol and advanced Claude Code techniques in depth to improve development efficiency:
|
||||
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 1: MCP & Claude Code Skills"
|
||||
description="Master Model Context Protocol (MCP) to extend the capabilities of AI programming tools"
|
||||
href="/en/stage-3/core-skills/basics/"
|
||||
title="Claude Code Quickstart Core Guide"
|
||||
description="Quickly master Claude Code's core usage, including installation, configuration, basic operations, and practical tips"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 2: Long-Running Tasks"
|
||||
description="Learn how to make AI coding tools handle long-running complex tasks"
|
||||
href="/en/stage-3/core-skills/mcp/"
|
||||
title="MCP and Claude Code Complete Guide"
|
||||
description="Master the Model Context Protocol (MCP) and expand the capability boundaries of AI coding tools"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/core-skills/skills/"
|
||||
title="Claude Code Skills Complete Guide"
|
||||
description="Package professional knowledge, workflows, and best practices into reusable skill bundles"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/core-skills/long-running-tasks/"
|
||||
title="How to Make Coding Tools Work for a Long Time"
|
||||
description="Learn how to let AI coding tools handle long-running, complex tasks"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/core-skills/agent-teams/"
|
||||
title="Claude Agent Teams Complete Guide"
|
||||
description="Let multiple AI instances collaborate like a real development team"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/core-skills/superpowers/"
|
||||
title="Claude Code Superpowers for Engineering-Grade Development"
|
||||
description="Use the Superpowers framework to help AI write engineering-grade code"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/core-skills/workflow/"
|
||||
title="Claude Code Workflow Best Practices"
|
||||
description="Master Claude Code best practices in different scenarios"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
|
||||
### Cross-Platform Development
|
||||
|
||||
Build WeChat Mini Programs, Android, and iOS applications to achieve cross-platform coverage:
|
||||
Build WeChat Mini Programs, Android and iOS applications, and achieve cross-platform coverage:
|
||||
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 3: Building WeChat Mini Programs"
|
||||
description="Develop WeChat Mini Programs from scratch, mastering core mini program development workflows"
|
||||
href="/en/stage-3/cross-platform/3.1-choose-platform/"
|
||||
title="How to Choose the Right Platform for Your App"
|
||||
description="Find the most suitable development platform based on user scenarios and needs"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 4: WeChat Mini Programs with Backend"
|
||||
description="Build complete WeChat Mini Program applications with backend support"
|
||||
href="/en/stage-3/cross-platform/3.3-wechat-miniprogram/"
|
||||
title="How to Build a WeChat Mini Program"
|
||||
description="Develop a WeChat Mini Program from scratch and master the core development workflow"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 5: Building Android Apps"
|
||||
href="/en/stage-3/cross-platform/3.4-wechat-miniprogram-backend/"
|
||||
title="How to Build a WeChat Mini Program (with Backend)"
|
||||
description="Build a complete WeChat Mini Program application with backend support"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.5-android-app/"
|
||||
title="How to Build an Android App"
|
||||
description="Use modern cross-platform frameworks to build Android native applications"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 6: Building iOS Apps"
|
||||
description="Develop and publish iOS applications, mastering iOS ecosystem development standards"
|
||||
href="/en/stage-3/cross-platform/3.6-ios-app/"
|
||||
title="How to Build an iOS App"
|
||||
description="Develop and publish iOS applications while mastering iOS ecosystem development standards"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.8-pwa-local-app/"
|
||||
title="How to Build a PWA Local App"
|
||||
description="Turn a web page into a real app with offline use and desktop installation support"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.9-browser-ai-extension/"
|
||||
title="How to Build a Browser AI Assistant Extension"
|
||||
description="Summarize any web page with one click and build your browser AI assistant"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.10-electron-voice-to-text/"
|
||||
title="How to Build a Cross-Platform Electron Desktop App"
|
||||
description="Build a speech-to-text desktop application for Windows, macOS, and Linux"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.11-nft-minting/"
|
||||
title="How to Quickly Build and Mint an NFT"
|
||||
description="A 10-minute starter version to write an NFT smart contract and mint from scratch"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.12-vscode-extension/"
|
||||
title="How to Build a VS Code Extension"
|
||||
description="Build your AI project assistant with multi-file Q&A and custom shortcuts"
|
||||
/>
|
||||
<NavCard
|
||||
href="/en/stage-3/cross-platform/3.13-qt-industrial-hmi/"
|
||||
title="How to Build an Industrial-Grade Qt Desktop App"
|
||||
description="Build a water-pump monitoring HMI system and master industrial desktop application development"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
|
||||
### Personal Brand
|
||||
|
||||
Build your own personal website and tech blog to establish personal influence:
|
||||
Build your own personal website and technical blog to establish personal influence:
|
||||
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced 7: Building Your Personal Website & Academic Blog"
|
||||
description="Use modern technology stacks to build high-performance, visually appealing personal blogs"
|
||||
href="/en/stage-3/personal-brand/3.7-personal-website-blog/"
|
||||
title="How to Build Your Own Personal Website and Academic Blog"
|
||||
description="Use a modern tech stack to build a high-performance, visually polished personal blog"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
### AI Capabilities Appendix
|
||||
|
||||
### Advanced AI Capabilities
|
||||
Explore advanced AI technologies such as RAG and LangGraph to build complex AI application workflows:
|
||||
|
||||
Explore advanced AI technologies like RAG and LangGraph to build complex AI application workflows:
|
||||
<NavGrid>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced AI 1: What is RAG and How It Works"
|
||||
href="/en/stage-3/ai-advanced/3.a1-rag-introduction/"
|
||||
title="What Is RAG and How It Works"
|
||||
description="Deeply understand the principles of Retrieval-Augmented Generation (RAG) and its value in AI applications"
|
||||
/>
|
||||
<NavCard
|
||||
href="#"
|
||||
title="Advanced AI 2: Advanced RAG & Workflow Orchestration - LangGraph"
|
||||
href="/en/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/"
|
||||
title="Intermediate and Advanced RAG with Workflow Orchestration - Using LangGraph as an Example"
|
||||
description="Learn to use LangGraph to orchestrate complex AI workflows and build advanced RAG systems"
|
||||
/>
|
||||
</NavGrid>
|
||||
|
||||
|
||||
## Who Is This For
|
||||
|
||||
- Advanced developers with full-stack development experience who want to challenge more complex applications
|
||||
- Engineers who want to master cross-platform development technologies
|
||||
- Explorers who want to deeply understand AI-native application development
|
||||
- Tech bloggers who want to build their personal technical brand
|
||||
- Technical bloggers who want to establish their personal technical brand
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Complete the "Full-Stack Development" stage, or have full-stack development experience
|
||||
- Familiar with frontend frameworks (such as React/Vue) and backend development
|
||||
- Complete the "Junior-to-Intermediate Development" stage, or have full-stack development experience
|
||||
- Be familiar with frontend frameworks (such as React/Vue) and backend development
|
||||
- Understand basic AI concepts and API usage
|
||||
|
||||
Ready to challenge advanced development? Click the left navigation to begin learning!
|
||||
Ready to challenge advanced development? Click the left navigation to start learning!
|
||||
|
||||
@@ -0,0 +1,985 @@
|
||||
# How to Build Your Own Personal Website and Academic Blog - Static Deployment with GitHub Pages
|
||||
|
||||
# 1. What Is a Personal Website and Academic Blog?
|
||||
|
||||
In this tutorial, we will run through a complete closed loop: **from finding an existing website template, to modifying it into a personal homepage for Elon Musk, and finally publishing it online for free**.
|
||||
|
||||
For this tutorial, you should at least have:
|
||||
|
||||
* **A computer** (Windows or Mac)
|
||||
* **Your GitHub account** (used to store website code and provide free hosting)
|
||||
* **Trae installed** (your AI coding partner)
|
||||
* **A Git environment**
|
||||
* **A Ruby environment**
|
||||
|
||||
## 1.1 What is an academic personal homepage?
|
||||
|
||||
An **academic personal homepage** is your own private territory on the internet.
|
||||
|
||||
Unlike WeChat Moments, Zhihu, or LinkedIn, it does not depend on any platform's recommendation algorithm, and it will not disappear if a platform shuts down. It is a long-term, stable **personal showcase space** that can be indexed by Google and Google Scholar. It usually contains your bio, publications, projects, and technical blog.
|
||||
|
||||

|
||||
|
||||
## 1.2 Why build your own website?
|
||||
|
||||
In the Vibe Coding development model, we no longer need to work through thick HTML/CSS books like people did ten years ago. With AI, the role of building a website shifts from "struggling coder" to "website editor-in-chief":
|
||||
|
||||
1. **You (Editor / PM)**: decide the site's tone and content. For example: "Put Musk's Mars colonization PPT here," or "Change this button to Tesla red."
|
||||
2. **Trae (AI Engineer)**: handles the hard implementation work. It turns your natural-language instructions into code, including layout, color schemes, and mobile adaptation.
|
||||
3. **GitHub Pages (Showroom)**: provides a free server and domain so people around the world can see your work.
|
||||
|
||||
**Why is it worth having for academics or technical people?**
|
||||
|
||||
* **Externally (building influence)**: it is an **"evergreen business card."** When applying for PhD programs, jobs, or collaborations, a tidy personal homepage is often much more persuasive than a PDF resume.
|
||||
* **Internally (knowledge accumulation)**: it is your **"second brain."** You can use it to record course notes, technical thinking, and build your own knowledge system.
|
||||
* **For the future (being discoverable)**: search engines like structured content. With a homepage, when people search your name, **the content you define** can appear first, instead of unrelated people with the same name.
|
||||
|
||||
## 1.3 Four typical ways to build a personal website
|
||||
|
||||
In practice, there are countless ways to build a website. Here we introduce only the four most mainstream ones:
|
||||
|
||||
**Method 1: hand-writing from scratch with HTML / CSS / JS**
|
||||
This is the traditional computer science route. You write the code character by character. The advantage is extreme flexibility. The disadvantage is a very high barrier to entry, and it is easy to get stuck while tweaking CSS. It is not ideal for those of us who want to focus on content.
|
||||
|
||||

|
||||
|
||||
**Method 2: visual site builders such as Wix / WordPress**
|
||||
This is like building with blocks. The advantage is easy drag-and-drop editing. The disadvantage is that it often requires payment, tends to generate bloated code, lacks an academic-geek feel, and is difficult to customize deeply.
|
||||
|
||||

|
||||
|
||||
**Method 3: GitHub-based templates (Static Site Generators)**
|
||||
This is the **most recommended** mainstream route in academic and geek communities. We directly fork a mature template written by others, such as one based on Jekyll or Hugo, and then only modify the configuration files and content.
|
||||
|
||||

|
||||
|
||||
**Method 4: Vibe Coding (AI visual generation flow)**
|
||||
With AI agents that have strong multimodal visual understanding, you only need to see a website style you like online, take a screenshot, and tell the AI: "Write me a webpage based on this style." The AI can then analyze the visual elements and generate the underlying code for you.
|
||||
|
||||

|
||||
|
||||
**The choice in this tutorial: GitHub Pages + academic template + AI modifications.**
|
||||
The reason is simple:
|
||||
|
||||
* **Zero cost**: no need to buy a server, no need to buy a domain.
|
||||
* **High quality**: templates are often designed by top developers, with minimal style, professional structure, and fast loading.
|
||||
* **Easy to maintain**: you mainly write Markdown, similar to writing in Feishu Docs or Notion, and AI helps generate the webpage.
|
||||
|
||||
## 1.4 The full roadmap of this tutorial
|
||||
|
||||
To make the configuration process more intuitive and less boring, we will use a fun case: **building an academic homepage for Musk**.
|
||||
|
||||
Although Elon Musk is not a university professor, he has published many public "technical white papers," such as *Hyperloop Alpha*, and also has many famous projects, such as SpaceX and Tesla. We will use those materials as test data and, together with Trae's Vibe Coding workflow, walk through a reusable site-building route:
|
||||
|
||||
1. **Find the skeleton**: locate a high-quality website template on GitHub and fork it into your own repository.
|
||||
2. **Prepare the environment**: pull the code locally and configure Trae so the AI can read your project.
|
||||
3. **Iterate with AI**: replace the template's placeholder person with Elon Musk, upload his resume, change the "publication list" into a "technical white paper showcase," and even ask AI to recolor the site to "Mars red."
|
||||
4. **Deploy online**: push the modified code back to GitHub and instantly get an accessible website URL.
|
||||
|
||||
This section is only responsible for drawing the big picture. For now, just remember the main line:
|
||||
**Fork template -> AI renovation -> push online**
|
||||
In the following sections, we will walk through every step together.
|
||||
|
||||
# 2. Environment Preparation
|
||||
|
||||
## 2.1 Tools used in this tutorial
|
||||
|
||||
The whole build process uses four tools or resources, each playing the role of designer, contractor, landowner, or logistics system.
|
||||
|
||||
* **A computer**: Windows or Mac is fine. Unlike Android development, which often has high memory requirements, web development is very lightweight and runs smoothly on an ordinary office laptop.
|
||||
* **Trae**: this is your **AI coding partner** and core productivity tool. In Vibe Coding mode, you do not need to master HTML or CSS syntax. You mainly tell AI in natural language, such as "Change the navigation bar to black" or "Put Musk's photo here," and let it write and modify the code for you.
|
||||
* **A GitHub account**: this is your **free server and code vault**. We need it to store all website files. Most importantly, we will use **GitHub Pages** to turn the code into a globally accessible URL for free, eliminating the need to buy a server or domain.
|
||||
* **Git environment**: this is the backstage **courier**. Although we write code locally in Trae, Git is what pushes the code from your computer to GitHub. You do not need to master Git commands, and Trae can help invoke them, but Git must be installed first.
|
||||
* **Ruby environment**: this is the local **web page workshop**. Because the academic template in this tutorial uses Jekyll, which runs on Ruby, we need Ruby locally so we can preview the website on our own computer before pushing it online.
|
||||
|
||||
## 2.2 Download Trae
|
||||
|
||||
**Trae** is our main battlefield for Vibe Coding. You can think of it as a **code editor with a super AI built in**. Unlike traditional cold editors, it is like an experienced programmer sitting next to you, always ready to help.
|
||||
|
||||
* **Download address**: visit the official site [https://www.trae.cn](https://www.trae.cn) and download the version for your operating system, Windows or Mac.
|
||||
* **Installation**: installation is very simple, just like installing WeChat or QQ. Double-click the installer package and click "Next" until it finishes.
|
||||
|
||||
After preparing this tool, in the following practical steps we will not need to stare at boring code panes. We will directly open the project here and use the chat panel on the right to tell the AI in natural language, in Chinese if you like, to help us write code, fix bugs, and even refactor whole pages.
|
||||
|
||||

|
||||
|
||||
## 2.3 Download Git
|
||||
|
||||
**What is Git?**
|
||||
If Trae is the AI engineer responsible for writing code in Vibe Coding, then **Git is the courier responsible for transporting code**. You need it to package the code written on your computer and safely push it to GitHub, your cloud repository. Without it, your site runs only on your own machine and no one else can see it.
|
||||
|
||||
In the past, you had to go to the official site, download the installer, and configure environment variables manually. That was annoying. Now, we can simply let Trae help detect and install it.
|
||||
|
||||
**Step 1: Check whether Git is already installed**
|
||||
|
||||
Open Trae and type the following instruction in the chat panel at the lower right:
|
||||
|
||||
```markdown
|
||||
Please help me check whether Git is already installed on this computer. Please run the `git --version` command in the terminal.
|
||||
```
|
||||
|
||||
* **Case A (already installed)**: if you see something like `git version 2.xx.x`, congratulations. You can skip the installation step directly.
|
||||
* **Case B (not installed)**: if you see "command not found" or a group of red error messages, continue below.
|
||||
|
||||

|
||||
|
||||
**Step 2: AI-assisted installation**
|
||||
|
||||
Do not close Trae. Continue typing in the chat panel:
|
||||
|
||||
**Instruction (Windows users):**
|
||||
|
||||
```markdown
|
||||
I have not installed Git. Please write the command that uses the `winget` command-line tool to install Git automatically, and tell me how to run it in the terminal.
|
||||
```
|
||||
|
||||
**Instruction (Mac users):**
|
||||
|
||||
```markdown
|
||||
I have not installed Git. Please tell me how to quickly install Git through terminal commands, for example using `git` or `brew`.
|
||||
```
|
||||
|
||||
Trae will give you a command, often something like `winget install --id Git.Git`.
|
||||
|
||||
You only need to click the **Run in Terminal** button in the code block or copy it into the terminal at the bottom and press Enter. It will automatically download and install Git for you.
|
||||
|
||||
If you still feel the AI-assisted process is not perfect enough, you can refer to this tutorial for manual download and installation:
|
||||
[Git download and installation tutorial](https://blog.csdn.net/weixin_41293671/article/details/144255269?ops_request_misc=elastic_search_misc&request_id=63236900b52320a7beb177787ba97f07&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~baidu_landing_v2~default-5-144255269-null-null.142^v102^pc_search_result_base4&utm_term=git%E4%B8%8B%E8%BD%BD%E5%AE%89%E8%A3%85&spm=1018.2226.3001.4187)
|
||||
|
||||
## 2.4 Install the Ruby environment
|
||||
|
||||
Before we officially start writing code, we still need one last piece of the puzzle. The academic homepage template used in this tutorial is built with Jekyll, which itself is based on the Ruby programming language.
|
||||
|
||||
To preview and debug the "renovation effect" on your own computer before pushing the code to GitHub for the world to see, we must install a Ruby environment on the computer. Think of this as hiring an interpreter on your computer who understands Ruby. Do not worry, you do not need to learn how to write Ruby. You only need to install it, and Trae can handle the rest.
|
||||
|
||||
### 2.4.1 Windows installation
|
||||
|
||||
**Step 1: Download the installer using a domestic mirror**
|
||||
|
||||
For Windows users, the official site at https://rubyinstaller.org/downloads/ provides one-click installers, but because of network differences, it helps to know a trick. The official recommendation for beginners is usually **`Ruby+Devkit 3.X.X (x64)`**, because it includes the required toolchain.
|
||||
|
||||
**Beginner reminder**: in practice, downloading directly from the official site may be slow or fail. We strongly recommend using the domestic mirror at [RubyInstaller for Windows - China mirror](https://rubyinstaller.cn/), which is usually much faster.
|
||||
|
||||

|
||||
|
||||
**Step 2: Run the installation**
|
||||
|
||||
Double-click the downloaded installer. In the setup wizard, make sure to check **"Add Ruby executables to your PATH."** This is the most important step. Otherwise the computer will not be able to "find" the interpreter you just installed.
|
||||
|
||||
After checking it, keep clicking **Next** to complete the installation.
|
||||
|
||||

|
||||
|
||||
**Step 3: Configure the development toolkit**
|
||||
|
||||
When the installation progress finishes, a black command-line window will open automatically. Do not panic. Type the number `3` where the cursor is blinking, which means installing the MSYS2 base environment and the MINGW toolchain, then press Enter. Wait until the commands finish running and the window closes automatically.
|
||||
|
||||

|
||||
|
||||
**Step 4: Verify the result**
|
||||
|
||||
Now it is time to ask AI to check your homework. Open Trae and type the following natural-language instruction in the right-side chat:
|
||||
|
||||
```markdown
|
||||
Please help me check whether the Ruby environment has been installed correctly on this computer. Please run the `ruby -v` command in the terminal at the bottom and tell me the result.
|
||||
```
|
||||
|
||||
If Trae replies with something like `ruby 3.x.x`, then your Windows Ruby environment is fully set up.
|
||||
|
||||

|
||||
|
||||
### 2.4.2 Mac installation
|
||||
|
||||
Configuring a Mac environment feels more "geeky" because it usually requires terminal commands. But in Vibe Coding mode, we do not even need to open the terminal manually. We can just let Trae act as our personal IT operator.
|
||||
|
||||
**Step 1: Give the one-shot environment setup instruction**
|
||||
|
||||
Open Trae and paste the following natural-language instruction into the chat on the right. We will ask it to handle checking Homebrew, installing it if missing, then installing Ruby:
|
||||
|
||||
```markdown
|
||||
I am using a Mac computer and need to configure a Ruby development environment. Please help me complete the following steps:
|
||||
1. Check whether Homebrew is already installed. If not, please run Homebrew's official installation script in the terminal.
|
||||
2. After confirming Homebrew is ready, run `brew install ruby` in the terminal.
|
||||
3. When everything is done, run `ruby -v` to confirm the installation succeeded.
|
||||
Please guide me step by step, and when necessary provide terminal commands that I can click and run directly.
|
||||
```
|
||||
|
||||
After receiving the instruction, Trae will start working and show code blocks with run buttons in the chat panel.
|
||||
|
||||
**Important note for beginners**
|
||||
|
||||
When installing Homebrew, the terminal often prompts something like `Password:` and asks for your Mac login password.
|
||||
|
||||
**Note:** when you type a password in the Mac terminal, the screen will not show any characters or stars. This is normal. Just type your password blindly and press Enter.
|
||||
|
||||
**Step 2: Verify the result**
|
||||
|
||||
After installation, go back to Trae and type:
|
||||
|
||||
```markdown
|
||||
I just installed Ruby on this Mac through `brew`. Please help me run the `ruby -v` command in the terminal and check whether the installation and environment variables are correct.
|
||||
```
|
||||
|
||||
When you see something like `ruby 3.x.x` in the terminal, the local webpage workshop is ready and your Mac is prepared for Vibe Coding.
|
||||
|
||||
## 2.5 Register a GitHub account
|
||||
|
||||
**What is GitHub?**
|
||||
If Git is the courier, then **GitHub is the cloud warehouse and showroom**. It not only hosts your code for free, but more importantly, with **GitHub Pages** it can turn your code into a globally accessible website URL. It is also the world's largest code hosting platform, and having a GitHub account is a kind of passport into the technical world.
|
||||
|
||||
**Registration steps:**
|
||||
|
||||
1. **Visit the official site**: open [https://github.com/](https://github.com/).
|
||||
2. **Click Sign up**: click **"Sign up"** in the upper right corner.
|
||||
|
||||

|
||||
|
||||
3. **Fill in your information**
|
||||
4. **Email**: enter a real email address.
|
||||
5. **Password**: choose a strong password.
|
||||
6. **Username (important!)**: **choose carefully**. Your homepage URL will later become **`https://your-username.github.io`**. It is best to use your English name, pinyin, a familiar ID, or a simple combination of letters and numbers. Do **not** choose something like `a1b2c3d4`, otherwise your website link will be hard to remember.
|
||||
7. **Verification and activation**: complete the human verification, often rotating images or choosing spiral galaxies, then check your email for the verification code.
|
||||
|
||||

|
||||
|
||||
Once registration is complete, you have a plot of your own on the internet. In the next section, we will begin building on that plot.
|
||||
|
||||

|
||||
|
||||
# 3. From Template to Your First Accessible Page
|
||||
|
||||
Everything is ready. In the first two chapters, we prepared the tools. In this chapter, we will officially claim land on the internet. The task in this chapter is simple:
|
||||
**Do not worry about decoration or content yet. First build the site's skeleton and get a live access link.**
|
||||
|
||||
We will directly fork a mature academic template and use GitHub Pages automation to get it running within twenty minutes. When finished, you will have a globally accessible link.
|
||||
|
||||
## 3.1 Get a website template
|
||||
|
||||
In Vibe Coding mode, we do not need to write HTML from scratch. GitHub has thousands of excellent open-source templates. We only need to "borrow" one and change the name to our own.
|
||||
|
||||
**Step 1: Find a template**
|
||||
|
||||
Here we have selected a classic template with a clear structure and strong suitability for academic display:
|
||||
https://github.com/luost26/academic-homepage?tab=readme-ov-file
|
||||
This template is based on the Jekyll framework.
|
||||
|
||||
Of course, you can also search **`academic-homepage`** on GitHub and pick another style you like, but to follow this tutorial, it is recommended to use the template above first.
|
||||
|
||||
We also prepared several additional template recommendations for you:
|
||||
|
||||
* Minimal Light personal homepage theme: https://github.com/yaoyao-liu/minimal-light?
|
||||
* Minimal Mistakes: [https://github.com/mmistakes/minimal-mistakes](https://github.com/mmistakes/minimal-mistakes?utm_source=chatgpt.com)
|
||||
* Pixyll: https://github.com/johno/pixyll
|
||||
* Hydejack: https://github.com/hydecorp/hydejack
|
||||
* Forty Jekyll Theme: https://github.com/andrewbanchich/forty-jekyll-theme
|
||||
* Leonids: https://github://github.com/renyuanz/leonids
|
||||
* YAT: https://github.com/jeffreytse/jekyll-theme-yat
|
||||
|
||||
**Step 2: Fork the project**
|
||||
|
||||
Visit the target repository homepage and click the **Fork** button in the upper right corner. A confirmation box will pop up. Click **Create Fork** directly.
|
||||
|
||||
* Explanation: this step is equivalent to copying someone else's code repository with a full set of keys into your own GitHub account. Now, you own your copy of the site.
|
||||
|
||||

|
||||
|
||||
**Step 3: Rename the repository, the most important step**
|
||||
|
||||
Change the repository name to:
|
||||
`your-username.github.io`
|
||||
|
||||
**Important note for beginners**:
|
||||
This is a hard rule of GitHub Pages.
|
||||
For example, if your GitHub username is `musk-fan`, then the repository name **must** be `musk-fan.github.io`.
|
||||
Only this way will GitHub automatically assign you a free domain. If the name is wrong, the webpage will not open later.
|
||||
|
||||

|
||||
|
||||
## 3.2 Get the GitHub project URL
|
||||
|
||||
After renaming, we need the repository pickup slip.
|
||||
|
||||
1. Return to the repository homepage, under the **Code** tab.
|
||||
2. Click the green **Code** button.
|
||||
3. Make sure the **HTTPS** tab is selected.
|
||||
4. Click the copy button and copy the URL ending in `.git`, for example `https://github.com/musk-fan/musk-fan.github.io.git`.
|
||||
|
||||

|
||||
|
||||
## 3.3 Pull the project locally
|
||||
|
||||
In the past, programmers had to type complex Git commands in a black terminal to download code. In the Vibe Coding era, we have Trae. We only need to tell AI, "I want this, help me pull it down."
|
||||
|
||||
**Step 1: Preparation**
|
||||
|
||||
Create a new folder on your computer, for example `MyWebsite`, then right-click and choose **Open with Trae**, or open Trae first and choose **Open Folder**.
|
||||
|
||||

|
||||
|
||||
**Step 2: Give the clone command**
|
||||
|
||||
After Trae opens, bring up the AI chat panel on the right and enter the following natural-language instruction:
|
||||
|
||||
```text
|
||||
Please help me clone the remote GitHub repository into the current folder.
|
||||
Repository address: paste the URL you just copied, for example https://github.com/musk-fan/musk-fan.github.io.git
|
||||
Execution requirement: please run the `git clone` command directly in the terminal.
|
||||
```
|
||||
|
||||
**Step 3: Confirm the download**
|
||||
|
||||
Trae will automatically invoke the terminal at the bottom and execute the command. Wait a few seconds. When you see files such as `_config.yml` and `index.html` appear in the file tree on the left, the project has been successfully moved to your computer.
|
||||
|
||||

|
||||
|
||||
## 3.4 Preview the webpage locally
|
||||
|
||||
The code is on your machine and the Ruby environment is ready. Before we modify the site, we must first inspect it locally on our own computer. This is like renovating a house: you first arrange everything in the showroom, confirm it looks right, and only then open it publicly.
|
||||
|
||||
Thanks to the Ruby environment installed in **Section 2.4**, this is now very simple.
|
||||
|
||||
**Step 1: Install dependencies**
|
||||
|
||||
A Jekyll site depends on many Gems to run. This is like buying all the furniture from a shopping list. **However**, because of network conditions, direct downloads can stall. We will ask Trae to **switch to a domestic mirror** and install dependencies there.
|
||||
|
||||
In Trae's chat box, enter:
|
||||
|
||||
```markdown
|
||||
I need to install the Jekyll dependencies. Considering the network environment, please first change the `source` in the Gemfile to the domestic mirror `https://gems.ruby-china.com/`. After that, please run the `bundle install` command in the terminal to install all dependencies.
|
||||
```
|
||||
|
||||
**Step 2: Start the local service**
|
||||
|
||||
Now we will start a **local server** to simulate the website running. Continue and tell Trae:
|
||||
|
||||
```markdown
|
||||
The dependencies have finished installing. Please help me start the Jekyll local preview service in the terminal. Please run the `bundle exec jekyll serve` command.
|
||||
```
|
||||
|
||||
After the terminal runs for a few seconds, you will see something similar to:
|
||||
`Server address: http://127.0.0.1:4000/academic-homepage/`
|
||||
|
||||
1. **Open the browser**: click that link, or type it directly into your browser:
|
||||
`http://127.0.0.1:4000/academic-homepage/`
|
||||
2. **See the magic**: now your site is already running in the browser. Although it still shows the original template author's name, it is already running locally on your computer.
|
||||
|
||||
From this point on, whenever you change content and press `Ctrl+S`, then refresh the browser, **the webpage content will change with it**.
|
||||
|
||||

|
||||
|
||||
Once local preview works, we can enter the next chapter and start turning the website into something shaped like Elon Musk.
|
||||
|
||||
# 4. AI-Assisted Content Modification
|
||||
|
||||
To help everyone quickly experience the full process, we will not use our own personal information, to avoid privacy anxiety. Instead, we will use **Elon Musk as an example** and build an academic homepage for him. This lets us drop the boring pressure of writing a personal resume and focus on the fun of Vibe Coding for websites. It also lets us see how cool it is to place the "technical white papers" of a Silicon Valley iron man, such as *Hyperloop Alpha*, on an academic-style website.
|
||||
|
||||
We will go through the complete loop from **getting the template** to **publishing the site**, and build a world-class personal showcase space by hand.
|
||||
|
||||
Follow my pace and send the first instruction to AI.
|
||||
|
||||
## 4.1 Unified global constraints
|
||||
|
||||
This is the **global setup prompt**. You only need to send it once.
|
||||
Its purpose is to set rules for the AI, to prevent it from improvising and breaking the site structure. Copy it directly into Trae:
|
||||
|
||||
```text
|
||||
You are now the maintainer of a “GitHub Pages + Jekyll academic homepage template” site.
|
||||
The current repository is a Jekyll-powered academic homepage (including `_config.yml`, `_data`, `_layouts`, etc.).
|
||||
Your modifications must follow these principles:
|
||||
1. Each step should only solve the current stage goal. Do not do later-stage content in advance.
|
||||
2. Do not modify the site structure, do not introduce new plugins, and do not change the theme style.
|
||||
3. All content must be renderable by Jekyll without errors.
|
||||
4. All identity information must follow an “academic-style simulation” tone and must not use first-person voice.
|
||||
5. Do not invent obviously fake IEEE / Nature papers.
|
||||
6. If information is uncertain, use “publicly well-known facts” or “reasonable academic simulation labeling.”
|
||||
```
|
||||
|
||||
## 4.2 Build Musk's homepage, the content part
|
||||
|
||||
### 4.2.1 First global instruction: replace the identity
|
||||
|
||||
The first thing we need to solve is "Who am I?" The template is filled with the original author's information, and we need to replace it with AI in one go.
|
||||
|
||||
**Step 1: Prepare the assets**
|
||||
|
||||
Put the image assets I provide to you, `University_of_Pennsylvania.jpg` and `Queen_University.jpg`, into the corresponding project folder, usually `/assets/images/badges/`.
|
||||
|
||||

|
||||

|
||||
|
||||
**Step 2: Send the instruction**
|
||||
|
||||
In Trae's right-side chat box, enter the following prompt. Note that we do not need to find and edit lines manually. We just tell AI what we want:
|
||||
|
||||
```text
|
||||
1. Goal: replace the “person identity” of the current academic homepage with Elon Musk. Only modify the basic profile information.
|
||||
2. Specific requirements:
|
||||
1. Name: Elon Musk
|
||||
2. Professional identity:
|
||||
Technology Entrepreneur
|
||||
Engineer
|
||||
Founder & CEO of SpaceX
|
||||
CEO of Tesla, Inc.
|
||||
3. Education:
|
||||
Queen’s University (Physics and Economics, not completed) (image path: /assets/images/badges/Queen_University.jpg)
|
||||
University of Pennsylvania (B.S. in Physics, B.A. in Economics) (image path: /assets/images/badges/University_of_Pennsylvania.jpg)
|
||||
4. Research Interests (can be simulated as):
|
||||
Space Systems Engineering
|
||||
Sustainable Energy Systems
|
||||
Artificial Intelligence & Robotics
|
||||
Large-scale Technological Innovation
|
||||
5. Honors & Recognition:
|
||||
Time Person of the Year (2021)
|
||||
Fellow of the Royal Society (FRS)
|
||||
Listed in Forbes Billionaires (multiple years)
|
||||
6. Constraints:
|
||||
Do not add papers / publications
|
||||
Do not invent IEEE, Nature, or Science papers
|
||||
Use academic-style wording and avoid commercial promotional tone
|
||||
Keep the original field structure unchanged and only replace the content
|
||||
```
|
||||
|
||||
At this point, you can see that Trae has completed all our modification requirements.
|
||||
|
||||

|
||||
|
||||
**Step 3: Refresh the local browser**
|
||||
|
||||
Refresh the local browser now, and you should see everything replaced correctly.
|
||||
|
||||

|
||||
|
||||
### 4.2.2 Iterative improvement: add "papers" and projects
|
||||
|
||||
Because Elon Musk is not a traditional university professor, he rarely publishes papers in *Nature* or *Science*. But as a "chief engineer," he has released many highly technical **white papers** and **master plans**.
|
||||
|
||||
Within the context of an academic homepage, we can redefine the meaning of "Publications" as **"Technical White Papers & Visionary Plans."** This is not awkward at all. In fact, it fits his builder identity very well.
|
||||
|
||||

|
||||
|
||||
**Step 1: Prepare the assets**
|
||||
|
||||
Download the cover images I provide, namely `Hyperloop_Alpha_sketch.jpg`, `SpaceX_Starship.jpg`, and `Neuralink_sewing_machine_robot.jpg`, place them under `/assets/images/covers/`, and remove the example images originally in that folder.
|
||||
|
||||

|
||||

|
||||

|
||||
|
||||
**Step 2: Send the instruction**
|
||||
|
||||
Send the following prompt to Trae and let it help us rebuild the data structure:
|
||||
|
||||
```text
|
||||
1. Role setting: you are a static site development expert who is proficient in Jekyll and Liquid syntax.
|
||||
2. Task goal:
|
||||
Modify the section title on the homepage or in the navigation bar.
|
||||
The current file structure is organized by year subfolders, for example `_publications/2023/xxx.md`.
|
||||
Create three new Markdown files in the specified format to display Elon Musk's technical white papers and visionary plans.
|
||||
3. Specific steps and requirements:
|
||||
1. Modify the section title
|
||||
Please search globally for the string "Selected Publications" (it may appear in `index.html`, `_config.yml`, or `_pages/publications.md`).
|
||||
Replace it with: "Technical White Papers & Visionary Plans".
|
||||
2. Rebuild the publication data (critical step)
|
||||
Clear all old content under the `_publications` folder, including old year folders such as 2023 and 2024.
|
||||
Create three new folders: `_publications/2013/`, `_publications/2017/`, and `_publications/2019/`.
|
||||
In those folders, create the following three Markdown files.
|
||||
3. Strictly follow this file format
|
||||
Important: you must strictly follow the YAML Front Matter format below, and must not invent new field names:
|
||||
- title: "paper title"
|
||||
- date: YYYY-MM-DD HH:MM:SS +0800
|
||||
- selected: true
|
||||
- pub: "venue / journal name"
|
||||
- pub_date: "year"
|
||||
- abstract: >- abstract content...
|
||||
- cover: /assets/images/covers/cover_name.jpg
|
||||
- authors: - Author1- Author2
|
||||
- links:Paper: https://paper-link
|
||||
4. Please generate the full code for the following three files (including the path descriptions):
|
||||
(1) Path: `_publications/2013/2013-hyperloop.md`
|
||||
Title: Hyperloop Alpha
|
||||
Date: 2013-08-12
|
||||
Pub: Tesla Blog (Open Source)
|
||||
Pub_date: "2013"
|
||||
Abstract: A proposal for a fifth mode of transport, utilizing a low-pressure tube and air bearings to achieve subsonic speeds.
|
||||
cover: /assets/images/covers/Hyperloop_Alpha_sketch.jpg
|
||||
Authors: Elon Musk, SpaceX & Tesla Teams
|
||||
Link: https://www.tesla.com/sites/default/files/blog_images/hyperloop-alpha.pdf
|
||||
(2) Path: `_publications/2017/2017-mars.md`
|
||||
Title: Making Humans a Multi-Planetary Species
|
||||
Date: 2017-06-01
|
||||
Pub: New Space
|
||||
Pub_date: "2017"
|
||||
Abstract: Detailed architecture of the Starship system designed to colonize Mars. This paper outlines the technical challenges to establish a self-sustaining city.
|
||||
cover: /assets/images/covers/SpaceX_Starship.jpg
|
||||
Authors: Elon Musk
|
||||
Link: https://www.liebertpub.com/doi/10.1089/space.2017.29009.emu
|
||||
(3) Path: `_publications/2019/2019-neuralink.md`
|
||||
Title: An Integrated Brain-Machine Interface Platform
|
||||
Date: 2019-10-16
|
||||
Pub: Journal of Medical Internet Research
|
||||
Pub_date: "2019"
|
||||
Abstract: We have built arrays of small and flexible electrode threads, with as many as 3,072 electrodes per array, and a neurosurgical robot.
|
||||
cover: /assets/images/covers/Neuralink_sewing_machine_robot.jpg
|
||||
Authors: Elon Musk, Neuralink
|
||||
Link: https://www.jmir.org/2019/10/e16194/
|
||||
Execution requirement:
|
||||
Please directly provide the complete content of these three files, and also provide the modification code for the file where you changed the title.
|
||||
```
|
||||
|
||||
**Step 3: Refresh the local browser**
|
||||
|
||||
When the build completes, you will find that the originally dull publication list has turned into a futuristic black-tech showcase.
|
||||
|
||||

|
||||
|
||||
### 4.2.3 Final polish: social links and avatar
|
||||
|
||||
This is the key step for moving from a score of 90 to a score of 100. The sidebar may still contain the template's original GitHub link or an incorrect email. We need to point them to Musk's real social accounts, mainly X.com.
|
||||
|
||||
**Step 1: Preparation**
|
||||
|
||||
Search Google for a good-looking photo of Musk, save it as `portrait.png`, or drag it into the `images/photo` folder in Trae and replace the original image.
|
||||
|
||||
**Step 2: Copy the following prompt into Trae**
|
||||
|
||||
```text
|
||||
1. Role setting: you are a detail-oriented Jekyll website development expert.
|
||||
2. Task goal: complete the final update of the website sidebar and personal information configuration. We need to update the author's avatar, intro, and social links to Elon Musk's real information.
|
||||
Please first scan the project structure and find the configuration file that controls the author information.
|
||||
3. Please make the following modifications:
|
||||
1. Avatar path fix
|
||||
I have already uploaded a new image named `portrait.png` into the `images/` or `assets/images/` folder.
|
||||
Please modify the avatar path in the configuration file to point to this image, and ensure the relative path is correct, for example `/images/portrait.png`.
|
||||
2. Social link cleanup
|
||||
Please update or remove the social icon links in the sidebar:
|
||||
Email: change it to `elon@spacex.com`, or if the field allows, comment it out or remove it to avoid harassment.
|
||||
Twitter / X: change it to `https://x.com/elonmusk` (this is the core link).
|
||||
GitHub: change it to `https://github.com/tesla` to point to the Tesla open-source repository, or remove it directly.
|
||||
Google Scholar: must be removed, because he does not maintain it.
|
||||
LinkedIn / ResearchGate: if they exist, remove them all.
|
||||
Output requirement:
|
||||
Please directly provide the complete modified configuration code snippet.
|
||||
```
|
||||
|
||||
**Step 3: Refresh the local browser**
|
||||
|
||||
1. Look at the sidebar. Is it now using that handsome photo? Does clicking the Twitter icon take you to X.com?
|
||||
|
||||
At this point, locally, you already have a complete, professional, and distinctly Musk-style personal academic homepage.
|
||||
|
||||

|
||||
|
||||
## 4.3 Injecting soul through UI customization, the style part
|
||||
|
||||
Right now the content is correct, but the page still looks like a printed resume. It lacks the sense of technology. In Vibe Coding mode, we do not need to understand CSS. We only need to describe the **feeling** we want to AI.
|
||||
|
||||
**Example scenario**:
|
||||
If you think the gray background is too dull and want to change it to **Mars red**, just ask Trae:
|
||||
*"I want to change the background color of the sidebar to dark red (#8B0000) to reflect the feeling of Mars. Which CSS or SCSS file should I modify? Please give me the code directly."*
|
||||
|
||||

|
||||
|
||||
If you like the **SpaceX Dashboard** style in the example image above, you can directly copy the following designer-level prompt:
|
||||
|
||||
```text
|
||||
1. Role setting: you are a top UI designer who admires “Swiss internationalist style” and is good at interfaces like Notion, Linear, or Apple.
|
||||
2. Task goal: please completely rewrite the CSS / SCSS to create a “SpaceX Dashboard” style minimalist academic homepage. The core keywords are: transparent, restrained, precise.
|
||||
3. Please apply the following concrete style overrides:
|
||||
1. Global typography
|
||||
Font: abandon the original serif font. Force the whole site to use the system-level sans-serif stack:
|
||||
'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif.
|
||||
Line height: increase breathing room in the body text with `line-height: 1.75`.
|
||||
Colors:
|
||||
Main title: #111111
|
||||
Body text: #333333
|
||||
Secondary information such as dates or citations: #666666
|
||||
2. Clean header
|
||||
Background: remove the previous black background and use pure white (#FFFFFF), or translucent white with blur if supported, for example `rgba(255, 255, 255, 0.9)` plus `backdrop-filter: blur(10px)`.
|
||||
Border: keep only a very thin bottom border, `border-bottom: 1px solid #EAEAEA`.
|
||||
Text: navigation links should use dark gray #333333, and only become black and bold on hover.
|
||||
3. Remove cards and return to content
|
||||
Remove the background and shadow of the left sidebar and the About me cards (`box-shadow: none`, `background: transparent`).
|
||||
Great minimalism lets the text float directly on the page background.
|
||||
Increase spacing: significantly increase `margin-bottom`, for example 80px, between sections and use whitespace instead of borders to separate content.
|
||||
4. Restrained use of brand color
|
||||
Use Tesla Red (#E82127) only on links and important buttons.
|
||||
Link style: remove underline and only change color. On hover, add a light red background block such as `background: rgba(232, 33, 39, 0.05)`.
|
||||
5. Avatar tuning
|
||||
Keep it circular with `border-radius: 50%`.
|
||||
Remove the border.
|
||||
Keep only a very light shadow, such as `box-shadow: 0 10px 30px rgba(0,0,0,0.08)`.
|
||||
Execution requirement:
|
||||
Please analyze the `_sass` or CSS files. Do not patch the old code. Instead, directly provide the code that resets and overrides the styles above.
|
||||
```
|
||||
|
||||
## 4.4 Replace it with your own information, the customization part
|
||||
|
||||
Congratulations. After going through the Musk homepage flow above, you have already mastered the core mindset of Vibe Coding for site building. Turning this sample room into your own home is actually easy now.
|
||||
|
||||
You do not need to start over. You only need to repeat the steps above, but with slightly more flexible strategy:
|
||||
|
||||
**Step 1: Physical replacement, avatar and basic information**
|
||||
|
||||
This is the easiest step:
|
||||
|
||||
1. **Change the photo**: in the file panel on the left side of Trae, find `assets/images/` and drag your own headshot there, replacing `portrait.png`.
|
||||
2. **Change the name**: tell Trae, "Replace all instances of Elon Musk across the entire site with [your name]."
|
||||
|
||||
**Step 2: AI preprocessing, let ChatGPT / Gemini help organize the content**
|
||||
|
||||
Trae is good at writing code, but if you directly throw a messy PDF resume at it, it may get confused.
|
||||
|
||||
**So a more efficient approach is this**:
|
||||
first use an AI that is strong at handling long text, such as ChatGPT, Gemini, or Kimi, to help you **cleanly format** the resume.
|
||||
|
||||
You can send ChatGPT a prompt like this:
|
||||
|
||||
```text
|
||||
Role setting: you are a professional academic website content planner.
|
||||
Task goal:
|
||||
I will send you my personal resume / CV. Please help me extract key information from it and organize it into a clear Markdown structure suitable for filling directly into a static website.
|
||||
Please strictly organize and refine it into the following five modules. If some content does not exist, leave it blank.
|
||||
1. Profile
|
||||
Name: my full name.
|
||||
Tagline: a one-line professional tag, for example “CS Student @ XX Univ | AI Enthusiast”.
|
||||
Bio: a 50 to 100 word third-person introduction summarizing my background and core skills, in a professional academic tone.
|
||||
Socials: extract email, GitHub, LinkedIn, blog links, and so on.
|
||||
2. Education
|
||||
Please list: school name, degree such as B.S. in CS, and time range.
|
||||
Optional: if GPA or core courses are available, add them on a separate line.
|
||||
3. Selected Projects — important
|
||||
Please extract 2 to 3 strongest projects, and for each include:
|
||||
Title: project name.
|
||||
Tech Stack: technologies used, such as Python, React, PyTorch.
|
||||
TL;DR: a one-line summary of what the project does.
|
||||
Description: 2 to 3 core contributions, refined using STAR style.
|
||||
Image Placeholder: reserve an image filename such as `project_name.jpg`.
|
||||
4. Publications / Articles
|
||||
If there are papers or technical articles, please extract:
|
||||
Title
|
||||
Venue
|
||||
Date, year is enough
|
||||
Abstract, one-sentence summary
|
||||
5. Skills
|
||||
Please organize them into categories: programming languages, frameworks / tools, and other skills.
|
||||
Output requirement:
|
||||
Do not explain the process. Directly output the cleaned Markdown content.
|
||||
```
|
||||
|
||||
Once you get this cleaned text, feed it into Trae, and the accuracy will improve dramatically.
|
||||
|
||||

|
||||

|
||||
|
||||
**Step 3: Replace the core content, with two possible routes**
|
||||
|
||||
At this step, depending on your preference, you can choose two different Vibe Coding modes:
|
||||
|
||||
1. **Mode A: let AI navigate, then edit manually**
|
||||
|
||||
If you want to know exactly where everything is changed, you can ask Trae:
|
||||
|
||||
```markdown
|
||||
I want to modify the “Education” section. Please tell me where the corresponding file path is and which lines contain the code.
|
||||
```
|
||||
|
||||
Trae will tell you in the chat something like:
|
||||
"The file you need to modify is `_pages/about.md`, and the relevant code is around line XX..."
|
||||
|
||||
You can then open that file yourself from the file tree on the left and fill in the cleaned content from ChatGPT like a structured editing exercise.
|
||||
|
||||

|
||||
|
||||
2. **Mode B: fully managed automation**
|
||||
|
||||
If you think finding files is too troublesome, directly paste your cleaned information into Trae:
|
||||
|
||||
```markdown
|
||||
Here is the cleaned content for my “Education” and “Project Experience” sections (paste the Markdown content).
|
||||
Please directly replace the corresponding content in the current site and preserve the existing layout format.
|
||||
```
|
||||
|
||||
# 5. Deploy Online
|
||||
|
||||
## 5.1 Deploy to GitHub Pages
|
||||
|
||||
**Step 1: Enable GitHub Actions, the cloud build**
|
||||
|
||||
Back on GitHub in the browser:
|
||||
|
||||
1. Click **Settings** at the top of the repository.
|
||||
2. In the left sidebar, click **Pages**.
|
||||
3. Under **Build and deployment**, change **Source** from `Deploy from a branch` to **`GitHub Actions`**.
|
||||
|
||||

|
||||
|
||||
**Step 2: Automatically configure the Jekyll workflow**
|
||||
|
||||
After switching, the page layout changes. GitHub will automatically recognize that this is a Jekyll project.
|
||||
|
||||
1. Find the **Jekyll (By GitHub Actions)** card.
|
||||
2. Click **Configure** on that card.
|
||||
|
||||

|
||||
|
||||
**Step 3: Commit the configuration file**
|
||||
|
||||
After clicking, you will be taken to a page full of code. This is a `.yml` configuration file already written by GitHub for building a Jekyll site.
|
||||
|
||||
1. **Do not modify any code**.
|
||||
2. Click the green **Commit changes...** button in the upper right corner.
|
||||
3. In the pop-up confirmation box, click **Commit changes** again.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
**Step 4: Wait and verify**
|
||||
|
||||
After the commit, GitHub's servers start working automatically.
|
||||
|
||||
1. Click the **Actions** tab in the top menu.
|
||||
2. You will see a task named `Deploy Jekyll site to Pages` spinning.
|
||||
3. Wait one to two minutes until the yellow circle turns into a **green check mark**.
|
||||
|
||||

|
||||
|
||||
**Step 5: Visit your website**
|
||||
|
||||
Once the circle turns green, you can access the default version of the template through an address like:
|
||||
**`https://your-username.github.io/`**
|
||||
|
||||
Congratulations. You have now successfully deployed a personal academic homepage that is globally accessible.
|
||||
|
||||
## 5.2 Commit changes and update the homepage
|
||||
|
||||
Now we will push all the local modifications we made earlier to GitHub, so this Musk-style personal homepage can be seen by the world.
|
||||
|
||||
1. Click **Source Control** on the left.
|
||||
2. Add all the **changes** into **staged changes**.
|
||||
3. Let Trae help generate a commit message, then click **Commit**.
|
||||
4. Click **Sync Changes** or **Push** to push to the `main` branch.
|
||||
5. Wait a moment until all processes under the **Actions** tab complete.
|
||||
|
||||

|
||||
|
||||
Now, congratulations. Open **`https://your-username.github.io/`**, and you already have a complete, professional, and strongly Musk-flavored academic homepage.
|
||||
|
||||

|
||||
|
||||
# 6. Advanced Play: Hand-build a Personal Homepage from Scratch
|
||||
|
||||
If you think academic templates are too rigid, or if you want to make a one-page website as cool as *The Matrix*, welcome to the **DIY section**.
|
||||
|
||||
Here, we do not fork anyone else's code. We will use Trae, starting from an empty folder, and generate a complete website with a single instruction, then deploy it online.
|
||||
|
||||
## 6.1 Why build it by hand
|
||||
|
||||
* **Absolute freedom**: no template constraints. If you want the navigation bar on the right, or fireworks in the background, you only need to tell the AI.
|
||||
* **Minimalism**: templates often contain hundreds of files, while a hand-built website may need only one `index.html`.
|
||||
* **Technical control**: this is the best way to understand how a webpage actually runs.
|
||||
|
||||
We will demonstrate the classic **pure HTML flow**:
|
||||
no compilation required, and GitHub Pages supports it natively, which makes it ideal for building a personal landing page.
|
||||
|
||||
## 6.2 Practical example: ask AI to write a "Mars command center" homepage
|
||||
|
||||
This time we are not doing the academic route. Suppose Musk wants an extremely minimal, futuristic personal homepage to present his Mars plan.
|
||||
|
||||
**Step 1: Create an empty project**
|
||||
|
||||
Create a new folder on your computer and open it with Trae. At that moment, the file tree on the left is completely empty.
|
||||
|
||||
*(Tip: you can prepare a photo of Musk in advance and name it `portrait.png`.)*
|
||||
|
||||
**Step 2: Build the framework**
|
||||
|
||||
Enter the following prompt in Trae's chat panel. Note that we require AI to write all code into a single file so that it is easy for beginners to manage:
|
||||
|
||||
```text
|
||||
I want to build a minimalist personal homepage for Elon Musk from scratch, without any complex framework, using only HTML + CSS + JS.
|
||||
Design style: SpaceX dashboard style.
|
||||
Background: use deep space black (#000000), with starlight animation.
|
||||
Main accent color: use “Mars red” (#E82127).
|
||||
Font: use a monospace font stack to imitate the feel of a code terminal.
|
||||
Page content:
|
||||
Place Elon Musk's avatar in the center, circular, with a rotating border. The image path is `portrait.png`.
|
||||
Name: Elon Musk (Technoking of Tesla)
|
||||
Intro: "Occupying Mars... 99% Loading."
|
||||
At the bottom, put three glowing buttons linking to X (Twitter), SpaceX, and Tesla.
|
||||
Technical requirement:
|
||||
Please put all CSS styles and HTML structure inside a single `index.html` file.
|
||||
Please generate the full code directly.
|
||||
```
|
||||
|
||||

|
||||
|
||||
**Step 3: Generate and preview**
|
||||
|
||||
In the previous step, Trae already helped us generate an `index.html` file. So how do we see its current effect?
|
||||
|
||||
Tell Trae in the chat:
|
||||
|
||||
```markdown
|
||||
Please help me start a local service to preview this webpage.
|
||||
```
|
||||
|
||||
You will receive a link such as `http://localhost:8000`. Copy and open it in the browser, and you will see a cool "Mars homepage," perhaps with stars twinkling in the background.
|
||||
|
||||

|
||||
|
||||
But we will notice that the current page is only a very cool landing page. As a complete personal homepage, it still has too little information and lacks the depth expected of an academic homepage. So based on this visual framework, we now continue to enrich it with academic-style information about Elon Musk.
|
||||
|
||||

|
||||
|
||||
**Step 4: Further improve the information**
|
||||
|
||||
We want Trae to keep the current Mars style, but restructure the page into something more like the academic template. We need to clearly tell it to move the existing elements to the left and create a new content area on the right for profile text and white papers, while keeping all newly added content in the same black-and-red cyberpunk style.
|
||||
|
||||
Copy the following prompt and send it to Trae:
|
||||
|
||||
```text
|
||||
Core principle:
|
||||
You must strictly preserve the current “SpaceX / Mars” design style, including pure black background, starlight decorations, red neon accent color, and monospace code-style font. Do not use the white background from the reference image.
|
||||
|
||||
Specific modification steps:
|
||||
1. Create a two-column layout
|
||||
Split the page into left and right columns. The left sidebar should take about 30% to 35% width, and the right content area should take about 65% to 70%.
|
||||
|
||||
2. Left sidebar - move the existing information
|
||||
Move all current elements from the original hero screen into the fixed left sidebar:
|
||||
- Avatar: keep Elon Musk's circular avatar.
|
||||
- Name and title: keep the red neon text “ELON MUSK” and “Technoking of Tesla”.
|
||||
- Loading bar: keep “Occupying Mars... 99% Loading” as the personal signature.
|
||||
- Social buttons: move the three red buttons, X, SPACE X, and TESLA, to the bottom of the left sidebar.
|
||||
|
||||
3. Right content area - add detailed information
|
||||
Add detailed personal introduction and achievements in the right area. All new body text should use white or light gray, while titles should use red neon emphasis. Please create the following sections:
|
||||
- About Me:
|
||||
Write a short introduction, for example: “Technology entrepreneur and engineer focused on multi-planetary expansion, sustainable energy, and artificial intelligence.”
|
||||
- Focus Areas:
|
||||
List Space Systems Engineering, Mars Colonization Architecture, Brain-Machine Interfaces.
|
||||
- Visionary Plans & White Papers:
|
||||
This is the key section. Refer to the list style in the example image, but convert it into a black-background style.
|
||||
Create a list displaying his important technical plans, using red borders or glow effects to distinguish each item.
|
||||
Item 1: “Making Humans a Multi-Planetary Species” (Starship Architecture, 2017).
|
||||
Item 2: “Hyperloop Alpha” (High-speed transportation proposal, 2013).
|
||||
Item 3: “Neuralink: An Integrated Brain-Machine Interface Platform” (2019).
|
||||
- Notable Achievements:
|
||||
Briefly list milestones such as:
|
||||
First private liquid-propellant rocket to reach orbit (Falcon 1)
|
||||
First reusable orbital class rocket (Falcon 9)
|
||||
|
||||
4. Style detail requirements
|
||||
All section titles on the right, such as “About Me,” should use the same red glowing style as the “ELON MUSK” text on the left.
|
||||
Make sure the whole page remains responsive and preserves a good two-column layout on different screen sizes.
|
||||
```
|
||||
|
||||
Refresh the browser after that, and your cyberpunk academic page is complete. Of course, you can keep improving it according to your own preferences. As in the previous steps, you only need to tell Trae the goal clearly, and it will handle the tedious coding process for you.
|
||||
|
||||

|
||||
|
||||
## 6.3 How to deploy the hand-built site
|
||||
|
||||
Unlike the previous forked template, which came from someone else's repository, this project is newly created by you and does not yet have a corresponding GitHub location. We therefore need to bind it manually.
|
||||
|
||||
**Step 1: Create a new repository on GitHub**
|
||||
|
||||
1. Log in to GitHub in the browser.
|
||||
2. Click the **+** icon in the upper right, then **New repository**.
|
||||
|
||||

|
||||
|
||||
3. **Repository name**: enter `mars-profile`, or any other name you like.
|
||||
|
||||
**Note**:
|
||||
If you have already used **`your-username.github.io`**, you cannot reuse that name here. You can choose another name, and GitHub will then generate a URL like **`your-username.github.io/mars-link`**.
|
||||
|
||||
4. **Public / Private**: choose **Public**.
|
||||
5. **Do not check "Add a README file"!**
|
||||
Leave the other options at their defaults.
|
||||
6. Click **Create repository**.
|
||||
|
||||

|
||||
|
||||
**Step 2: Push the local code to the cloud**
|
||||
|
||||
After creation, GitHub will take you to a page with a lot of code-looking content. Do not worry. We just need to copy the repository link shown on that page.
|
||||
|
||||

|
||||
|
||||
Go back to Trae and type in the chat:
|
||||
|
||||
```markdown
|
||||
I have created an empty repository on GitHub. The address is: https://github.com/your-username/mars-link.git (please replace this with the actual repository address you just created).
|
||||
Now please help me initialize the current local project as a Git repository and push the code to the `main` branch of this remote address.
|
||||
```
|
||||
|
||||
Trae will usually help execute the standard sequence below, and you may only need to click to run them:
|
||||
|
||||
1. `git init`
|
||||
2. `git add .` and `git commit -m "First commit"`
|
||||
3. `git branch -M main` and `git remote add origin [your address]`
|
||||
4. `git push -u origin main`
|
||||
|
||||
After Trae completes the push, go back to GitHub and refresh the page. Click the **Code** tab, and you will see that the code written in Trae has been successfully pushed into the repository.
|
||||
|
||||

|
||||
|
||||
**Step 3: Enable GitHub Pages**
|
||||
|
||||
After the code is pushed, the webpage will not appear automatically. We still need to turn on the switch manually:
|
||||
|
||||
1. Go back to the GitHub repository page and click **Settings** at the top.
|
||||
2. Click **Pages** in the left sidebar.
|
||||
3. Under **Build and deployment**:
|
||||
1. Set **Source** to `Deploy from a branch`.
|
||||
2. Set **Branch** to `main`, and choose `/(root)` as the folder.
|
||||
4. Click **Save**.
|
||||
|
||||

|
||||
|
||||
After you click Save, the webpage will not appear instantly. GitHub's backend works like a small robot factory. It needs around **1 to 2 minutes** to package your code, build it, and publish it to global servers.
|
||||
|
||||
Wait patiently and refresh the page. Under the big **GitHub Pages** heading, you will see a line with a URL similar to:
|
||||
**"Your site is live at `https://your-username.github.io/mars-link/`"**
|
||||
|
||||

|
||||
|
||||
Click it, and your Mars command center is online.
|
||||
|
||||

|
||||
|
||||
# 7. Final words
|
||||
|
||||
The tutorial is over. Now, when you look at the `.github.io` glowing in your browser's address bar, do you feel a little like you have planted a flag on the internet?
|
||||
|
||||
In this tutorial, we borrowed Elon Musk's persona and built a website like a Lego project that looks quite impressive. But this is only the beginning. The most charming part of Vibe Coding is not how much typing time it saves. It is that it **completely smashes the wall between “idea” and “reality.”**
|
||||
|
||||
In the past, you might have given up on showing a project because **you could not write CSS**.
|
||||
Now, the only limits left are your **imagination** and your **taste**.
|
||||
|
||||
**Do not let this site stay a “Musk-inspired clone.”**
|
||||
That Tesla link you used for practice and that Mars-colonization white paper are ultimately someone else's story. Your homepage should be your own name card in the digital world.
|
||||
|
||||
Go and put your first real project experience there.
|
||||
Go and publish your own unique thoughts on a technical topic.
|
||||
You can even put your favorite book list or your own photos on it.
|
||||
Thoughts that would get buried on WeChat Moments can stay here permanently.
|
||||
Passion that does not fit inside a resume can spread freely here.
|
||||
|
||||
Do not leave this plot empty.
|
||||
Go experiment. Go break it. Go rebuild it.
|
||||
Keep doing that until it grows into the shape you like most.
|
||||
|
||||

|
||||
|
||||
***Go ahead, and let the world see you.***
|
||||
|
||||
# References
|
||||
|
||||
CSDN: [2025 latest nanny-level tutorial: step by step on using GitHub to build a personal homepage](https://blog.csdn.net/qq_45743991/article/details/145505150?ops_request_misc=&request_id=&biz_id=102&utm_term=github%E6%9E%84%E5%BB%BA%E4%B8%AA%E4%BA%BA%E4%B8%BB%E9%A1%B5&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-0-145505150.142^v102^pc_search_result_base4&spm=1018.2226.3001.4187)
|
||||
|
||||
CSDN: [Git download and installation tutorial](https://blog.csdn.net/weixin_41293671/article/details/144255269?ops_request_misc=elastic_search_misc&request_id=63236900b52320a7beb177787ba97f07&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~baidu_landing_v2~default-5-144255269-null-null.142^v102^pc_search_result_base4&utm_term=git%E4%B8%8B%E8%BD%BD%E5%AE%89%E8%A3%85&spm=1018.2226.3001.4187)
|
||||
|
||||
CSDN: [Ruby installation tutorial under Windows](https://blog.csdn.net/alive_tree/article/details/103043158?ops_request_misc=elastic_search_misc&request_id=ad7e29ea7f702554d785c2fc82ec6e95&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~ElasticSearch~search_v2-11-103043158-null-null.142^v102^pc_search_result_base4&utm_term=ruby%E5%AE%89%E8%A3%85%E6%95%99%E7%A8%8B&spm=1018.2226.3001.4187)
|
||||
+82
-41
@@ -900,122 +900,139 @@
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/ai-capabilities/2.1-dify-knowledge-base/</loc>
|
||||
<lastmod>2026-02-27T21:58:38+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:44:49.470Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/ai-capabilities/2.1-dify-knowledge-base/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/ai-capabilities/2.1-dify-knowledge-base/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/assignments/2.1-fullstack-app/</loc>
|
||||
<lastmod>2026-01-13T20:09:45+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:00:25.007Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/assignments/2.1-fullstack-app/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/assignments/2.1-fullstack-app/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/assignments/2.2-modern-frontend-trae/</loc>
|
||||
<lastmod>2026-01-13T20:09:45+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:00:25.008Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/assignments/2.2-modern-frontend-trae/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/assignments/2.2-modern-frontend-trae/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.2-database-supabase/</loc>
|
||||
<lastmod>2026-03-18T09:34:06+08:00</lastmod>
|
||||
<lastmod>2026-03-24T10:17:02.326Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.2-database-supabase/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.2-database-supabase/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.3-ai-interface-code/</loc>
|
||||
<lastmod>2026-03-02T16:17:11+08:00</lastmod>
|
||||
<lastmod>2026-03-23T15:51:59.282Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.3-ai-interface-code/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.3-ai-interface-code/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.4-git-workflow/</loc>
|
||||
<lastmod>2026-02-27T18:45:52+08:00</lastmod>
|
||||
<lastmod>2026-03-24T10:08:14.884Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.4-git-workflow/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.4-git-workflow/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.5-zeabur-deployment/</loc>
|
||||
<lastmod>2026-02-27T18:26:49+08:00</lastmod>
|
||||
<lastmod>2026-03-24T01:20:30.534Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.5-zeabur-deployment/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.5-zeabur-deployment/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.6-modern-cli/</loc>
|
||||
<lastmod>2026-03-23T17:36:13+08:00</lastmod>
|
||||
<lastmod>2026-03-24T09:29:56.836Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.6-modern-cli/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.6-modern-cli/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.7-stripe-payment/</loc>
|
||||
<lastmod>2026-02-10T17:23:27+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:00:25.008Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/backend/2.7-stripe-payment/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/backend/2.7-stripe-payment/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.0-lovart-assets/</loc>
|
||||
<lastmod>2026-03-16T12:42:29+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:28:32.292Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.0-lovart-assets/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.0-lovart-assets/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.1-figma-mastergo/</loc>
|
||||
<lastmod>2026-03-16T12:42:29+08:00</lastmod>
|
||||
<lastmod>2026-03-23T15:48:55.095Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.1-figma-mastergo/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.1-figma-mastergo/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.2-ui-design/</loc>
|
||||
<lastmod>2026-02-27T18:46:11+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:00:25.008Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.2-ui-design/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.2-ui-design/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.3-multi-product-ui/</loc>
|
||||
<lastmod>2026-02-27T18:46:11+08:00</lastmod>
|
||||
<lastmod>2026-03-24T05:49:47.526Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.3-multi-product-ui/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.3-multi-product-ui/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.4-llm-skills-beautiful/</loc>
|
||||
<lastmod>2026-03-01T12:28:47+08:00</lastmod>
|
||||
<lastmod>2026-03-24T09:24:24.080Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.4-llm-skills-beautiful/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.4-llm-skills-beautiful/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.5-hogwarts-portraits/</loc>
|
||||
<lastmod>2026-02-28T19:13:23+08:00</lastmod>
|
||||
<lastmod>2026-03-24T01:18:35.782Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.5-hogwarts-portraits/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.5-hogwarts-portraits/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.6-design-to-code/</loc>
|
||||
<lastmod>2026-02-28T19:12:09+08:00</lastmod>
|
||||
<lastmod>2026-03-23T15:51:59.282Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.6-design-to-code/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.6-design-to-code/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.7-modern-component-library/</loc>
|
||||
<lastmod>2026-03-02T20:11:08+08:00</lastmod>
|
||||
<lastmod>2026-03-24T10:12:29.711Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/frontend/2.7-modern-component-library/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-2/frontend/2.7-modern-component-library/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-2/</loc>
|
||||
@@ -1035,164 +1052,187 @@
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/ai-advanced/3.a1-rag-introduction/</loc>
|
||||
<lastmod>2026-02-16T09:21:33+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:30:22.563Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/ai-advanced/3.a1-rag-introduction/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/ai-advanced/3.a1-rag-introduction/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/</loc>
|
||||
<lastmod>2026-03-02T02:20:17+08:00</lastmod>
|
||||
<lastmod>2026-03-24T07:00:25.008Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/ai-advanced/3.a2-langgraph-advanced-rag/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/agent-teams/</loc>
|
||||
<lastmod>2026-03-14T22:17:12+08:00</lastmod>
|
||||
<lastmod>2026-03-24T16:19:04.205Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/agent-teams/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/agent-teams/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/basics/</loc>
|
||||
<lastmod>2026-03-23T17:36:13+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:39:26.659Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/basics/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/basics/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/claude-agent-sdk/</loc>
|
||||
<lastmod>2026-03-02T12:52:38+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:26:09.452Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/claude-agent-sdk/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/claude-agent-sdk/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/long-running-tasks/</loc>
|
||||
<lastmod>2026-03-18T17:22:35+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:29:07.591Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/long-running-tasks/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/long-running-tasks/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/mcp/</loc>
|
||||
<lastmod>2026-03-01T12:26:02+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:06:58.426Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/mcp/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/mcp/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/mobile-development/</loc>
|
||||
<lastmod>2026-03-02T14:26:06+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:26:46.026Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/mobile-development/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/mobile-development/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/skills/</loc>
|
||||
<lastmod>2026-03-02T10:29:08+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:27:00.403Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/skills/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/skills/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/spec-coding/</loc>
|
||||
<lastmod>2026-03-02T12:42:07+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:15:15.558Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/spec-coding/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/spec-coding/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/superpowers/</loc>
|
||||
<lastmod>2026-03-01T12:26:02+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:27:46.387Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/superpowers/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/superpowers/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/workflow/</loc>
|
||||
<lastmod>2026-03-02T16:17:11+08:00</lastmod>
|
||||
<lastmod>2026-03-24T10:34:29.162Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/core-skills/workflow/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/core-skills/workflow/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.1-choose-platform/</loc>
|
||||
<lastmod>2026-03-08T12:17:04+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:11:17.435Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.1-choose-platform/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.1-choose-platform/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.10-electron-voice-to-text/</loc>
|
||||
<lastmod>2026-02-28T15:00:32+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.078Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.10-electron-voice-to-text/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.10-electron-voice-to-text/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.11-nft-minting/</loc>
|
||||
<lastmod>2026-02-28T15:00:32+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.087Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.11-nft-minting/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.11-nft-minting/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.12-vscode-extension/</loc>
|
||||
<lastmod>2026-02-28T15:00:32+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.099Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.12-vscode-extension/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.12-vscode-extension/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.13-qt-industrial-hmi/</loc>
|
||||
<lastmod>2026-02-28T15:00:32+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.106Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.13-qt-industrial-hmi/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.13-qt-industrial-hmi/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.3-wechat-miniprogram/</loc>
|
||||
<lastmod>2026-03-14T15:42:54+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.025Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.3-wechat-miniprogram/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.3-wechat-miniprogram/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.4-wechat-miniprogram-backend/</loc>
|
||||
<lastmod>2026-03-02T02:20:17+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:08:37.178Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.4-wechat-miniprogram-backend/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.4-wechat-miniprogram-backend/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.5-android-app/</loc>
|
||||
<lastmod>2026-02-15T09:24:02+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.033Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.5-android-app/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.5-android-app/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.6-ios-app/</loc>
|
||||
<lastmod>2026-02-15T09:24:02+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.049Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.6-ios-app/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.6-ios-app/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.8-pwa-local-app/</loc>
|
||||
<lastmod>2026-03-07T23:02:24+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.061Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.8-pwa-local-app/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.8-pwa-local-app/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.9-browser-ai-extension/</loc>
|
||||
<lastmod>2026-03-05T01:27:15+08:00</lastmod>
|
||||
<lastmod>2026-03-24T06:30:57.068Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/cross-platform/3.9-browser-ai-extension/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/cross-platform/3.9-browser-ai-extension/"/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/</loc>
|
||||
@@ -1212,9 +1252,10 @@
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/personal-brand/3.7-personal-website-blog/</loc>
|
||||
<lastmod>2026-02-15T09:24:02+08:00</lastmod>
|
||||
<lastmod>2026-03-24T08:04:35.919Z</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
<xhtml:link rel="alternate" hreflang="zh-CN" href="https://datawhalechina.github.io/easy-vibe/zh-cn/stage-3/personal-brand/3.7-personal-website-blog/"/>
|
||||
<xhtml:link rel="alternate" hreflang="en" href="https://datawhalechina.github.io/easy-vibe/en/stage-3/personal-brand/3.7-personal-website-blog/"/>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
Reference in New Issue
Block a user