{
  "updatedAt": "2026-05-23T05:38:09.183Z",
  "date": "2026-05-23",
  "count": 60,
  "items": [
    {
      "arxivId": "2605.20025",
      "title": "AutoResearchClaw: Self-Reinforcing Autonomous Research with Human-AI Collaboration",
      "summary": "AutoResearchClaw is a multi-agent autonomous research system that improves scientific discovery through structured debate, self-healing execution, verifiable reporting, human collaboration, and evolutionary learning, outperforming previous systems on a benchmark while maintaining human oversight.",
      "authors": [
        "Jiaqi Liu",
        "Shi Qiu",
        "Mairui Li",
        "Bingzhou Li",
        "Haonian Ji",
        "Siwei Han"
      ],
      "organization": null,
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 115,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.20025.png",
      "arxivUrl": "https://arxiv.org/abs/2605.20025",
      "pdfUrl": "https://arxiv.org/pdf/2605.20025.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.20025",
      "githubRepo": "https://github.com/aiming-lab/AutoResearchClaw",
      "githubStars": 12503,
      "keywords": [
        "multi-agent autonomous research",
        "structured multi-agent debate",
        "self-healing executor",
        "\\textsc{Pivot}/\\textsc{Refine} decision loop",
        "verifiable result reporting",
        "human-in-the-loop collaboration"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "AutoResearchClaw：基于人类-AI协作的自增强自主研究",
      "summary_zh": "通过结构化辩论和进化学习提升科学发现，优于现有系统并保持人类监督",
      "title_i18n": {
        "en": "AutoResearchClaw: Self-Reinforcing Autonomous Research with Human-AI Collaboration",
        "zh-CN": "AutoResearchClaw：基于人类-AI协作的自增强自主研究",
        "ja": "AutoResearchClaw: 自己強化型の自律的研究所（人間-AI協働）",
        "ko": "AutoResearchClaw: Self-Reinforcing Autonomous Research with Human-AI Collaboration",
        "es": "AutoResearchClaw: Autoinvestigación Autónoma con Colaboración Humano-IA",
        "de": "AutoResearchClaw: Self-Reinforcing Autonomous Research with Human-AI Collaboration"
      },
      "summary_i18n": {
        "en": "AutoResearchClaw is a multi-agent autonomous research system that improves scientific discovery through structured debate, self-healing execution, verifiable reporting, human collaboration, and evolutionary learning, outperforming previous systems on a benchmark while maintaining human oversight.",
        "zh-CN": "通过结构化辩论和进化学习提升科学发现，优于现有系统并保持人类监督",
        "ja": "AutoResearchClawは構造的な議論、自己修復実行、検証可能な報告、人間との協働、進化的学習を通じて科学的発見を改善するマルチエージェントの自律研究システムである。",
        "ko": "AutoResearchClaw는 구조화된 논쟁과 자가 치유 실행을 통해 과학적 발견을 개선하는 다중 에이전트 시스템입니다.",
        "es": "AutoResearchClaw es un sistema de investigación autónomo multiagente que mejora el descubrimiento científico mediante debates estructurados y aprendizaje evolutivo, superando sistemas anteriores en una prueba.",
        "de": "AutoResearchClaw ist ein Multi-Agenten-System für autonome Forschung, das wissenschaftliche Entdeckungen durch strukturierte Debatte verbessert und menschliche Überwachung gewährleistet."
      }
    },
    {
      "arxivId": "2605.22355",
      "title": "TransitLM: A Large-Scale Dataset and Benchmark for Map-Free Transit Route Generation",
      "summary": "TransitLM dataset enables end-to-end transit route planning using large language models trained on structured transit data, eliminating the need for traditional map-based approaches.",
      "authors": [
        "Hanyu Guo",
        "Jiedong Yang",
        "Chao Chen",
        "Longfei Xu",
        "Kaikui Liu",
        "Xiangxiang Chu"
      ],
      "organization": {
        "_id": "67d11771890254196d3174e5",
        "name": "GD-ML",
        "fullname": "AMAP-ML",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/67d116c47be76de1a40873ca/s5ukAx9E36ZZIKvbpBRi4.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 165,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22355.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22355",
      "pdfUrl": "https://arxiv.org/pdf/2605.22355.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22355",
      "githubRepo": "https://github.com/HotTricker/TransitLM",
      "githubStars": 109,
      "keywords": [
        "large language models",
        "transit route planning",
        "structured map infrastructure",
        "routing engines",
        "continual pre-training",
        "evaluation tasks"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "TransitLM：无地图公交路线生成的大规模数据集与基准",
      "summary_zh": "使用结构化交通数据训练大语言模型，实现端到端公交路线生成，无需传统地图方法",
      "title_i18n": {
        "en": "TransitLM: A Large-Scale Dataset and Benchmark for Map-Free Transit Route Generation",
        "zh-CN": "TransitLM：无地图公交路线生成的大规模数据集与基准",
        "ja": "TransitLM: マップフリーな輸送ルート生成のための大規模データセットとベンチマーク",
        "ko": "TransitLM: A Large-Scale Dataset and Benchmark for Map-Free Transit Route Generation",
        "es": "TransitLM: Un conjunto de datos y evaluación a gran escala para generación de rutas de transporte sin mapas",
        "de": "TransitLM: A Large-Scale Dataset and Benchmark for Map-Free Transit Route Generation"
      },
      "summary_i18n": {
        "en": "TransitLM dataset enables end-to-end transit route planning using large language models trained on structured transit data, eliminating the need for traditional map-based approaches.",
        "zh-CN": "使用结构化交通数据训练大语言模型，实现端到端公交路线生成，无需传统地图方法",
        "ja": "TransitLMデータセットは構造化された輸送データで訓練された大規模言語モデルを使用して、従来のマップベースのアプローチを必要とせずにエンドツーエンドの輸送ルート計画を可能にする。",
        "ko": "TransitLM 데이터세트는 구조화된 교통 데이터로 훈련된 대규모 언어 모델을 사용하여 지도 기반 접근법 없이 종단간 교통 경로 계획을 가능하게 합니다.",
        "es": "El conjunto de datos TransitLM permite planificación de rutas de transporte mediante modelos de lenguaje grande entrenados en datos estructurados de transporte, eliminando la necesidad de enfoques basados en mapas.",
        "de": "Der TransitLM-Datensatz ermöglicht die End-to-End-Planung von Verkehrsverbindungen mit großen Sprachmodellen, ohne Karten zu benötigen."
      }
    },
    {
      "arxivId": "2605.12882",
      "title": "CiteVQA: Benchmarking Evidence Attribution for Trustworthy Document Intelligence",
      "summary": "CiteVQA introduces a benchmark for document vision-language models that evaluates both answer accuracy and correct citation of supporting evidence, revealing significant attribution hallucinations in current models.",
      "authors": [
        "Dongsheng Ma",
        "Jiayu Li",
        "Zhengren Wang",
        "Yijie Wang",
        "Jiahao Kong",
        "Weijun Zeng"
      ],
      "organization": {
        "_id": "66ce9d1f5e180b9b9c8e6f31",
        "name": "opendatalab",
        "fullname": "OpenDataLab",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png"
      },
      "publishedAt": "2026-05-13T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 261,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.12882.png",
      "arxivUrl": "https://arxiv.org/abs/2605.12882",
      "pdfUrl": "https://arxiv.org/pdf/2605.12882.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.12882",
      "githubRepo": "https://github.com/opendatalab/CiteVQA",
      "githubStars": 64,
      "keywords": [
        "Multimodal Large Language Models",
        "Doc-VQA",
        "document understanding",
        "bounding-box citations",
        "Strict Attributed Accuracy",
        "Attribution Hallucination"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "CiteVQA：可信文档智能的证据归属基准",
      "summary_zh": "构建CiteVQA基准，评估文档视觉-语言模型的答案准确性和引用正确性，发现当前模型存在显著的归属幻觉。",
      "title_i18n": {
        "en": "CiteVQA: Benchmarking Evidence Attribution for Trustworthy Document Intelligence",
        "zh-CN": "CiteVQA：可信文档智能的证据归属基准",
        "ja": "CiteVQA: 信頼性のあるドキュメントインテリジェンスのための証拠属性付けのベンチマーク",
        "ko": "CiteVQA: Benchmarking Evidence Attribution for Trustworthy Document Intelligence",
        "es": "CiteVQA: Evaluación de atribución de evidencia para inteligencia documental confiable",
        "de": "CiteVQA: Benchmarking Evidence Attribution for Trustworthy Document Intelligence"
      },
      "summary_i18n": {
        "en": "CiteVQA introduces a benchmark for document vision-language models that evaluates both answer accuracy and correct citation of supporting evidence, revealing significant attribution hallucinations in current models.",
        "zh-CN": "构建CiteVQA基准，评估文档视觉-语言模型的答案准确性和引用正确性，发现当前模型存在显著的归属幻觉。",
        "ja": "CiteVQAは、回答の正確さとサポートされる証拠の正しい引用を評価するドキュメントビジョン-言語モデルのベンチマークを導入し、現在のモデルにおける著しい属性の幻覚を明らかにした。",
        "ko": "CiteVQA는 문서 비전-언어 모델의 정확한 답변과 지원 증거의 올바른 인용을 평가하는 벤치마크를 제시합니다.",
        "es": "CiteVQA introduce una evaluación para modelos de visión-lenguaje documental que evalúa precisión de respuestas y citación correcta de evidencia, revelando alucinaciones en modelos actuales.",
        "de": "CiteVQA stellt einen Benchmark für Dokumenten-Vision-Language-Modelle vor, der Antwortgenauigkeit und Zitierungen bewertet und erhebliche Halluzinationen aufzeigt."
      }
    },
    {
      "arxivId": "2605.18747",
      "title": "Code as Agent Harness",
      "summary": "Large language models are increasingly used as operational substrates for agent reasoning and execution in agentic systems, with code serving as a unified infrastructure layer across multiple domains and applications.",
      "authors": [
        "Xuying Ning",
        "Katherine Tieu",
        "Dongqi Fu",
        "Tianxin Wei",
        "Zihao Li",
        "Yuanchen Bei"
      ],
      "organization": null,
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 196,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18747.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18747",
      "pdfUrl": "https://arxiv.org/pdf/2605.18747.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18747",
      "githubRepo": "https://github.com/YennNing/Awesome-Code-as-Agent-Harness-Papers",
      "githubStars": 198,
      "keywords": [
        "large language models",
        "agentic systems",
        "agent harness",
        "code as agent harness",
        "agent infrastructure",
        "reasoning"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "代码作为代理的架构",
      "summary_zh": "将代码作为统一基础设施层，用于代理推理与执行。",
      "title_i18n": {
        "en": "Code as Agent Harness",
        "zh-CN": "代码作为代理的架构",
        "ja": "Code as Agent Harness",
        "ko": "Code as Agent Harness",
        "es": "Código como Arnés de Agente",
        "de": "Code as Agent Harness"
      },
      "summary_i18n": {
        "en": "Large language models are increasingly used as operational substrates for agent reasoning and execution in agentic systems, with code serving as a unified infrastructure layer across multiple domains and applications.",
        "zh-CN": "将代码作为统一基础设施层，用于代理推理与执行。",
        "ja": "エージェントの推論と実行のための運用基盤としての大型言語モデルの使用が増加しており、コードは複数のドメインとアプリケーションにおいて統一されたインフラ層となる。",
        "ko": "대규모 언어 모델은 에이전트 시스템에서 운영 기반으로 사용되며, 코드는 다양한 도메인과 애플리케이션에서 통합 인프라 계층으로 작동합니다.",
        "es": "Los modelos de lenguaje grande se usan cada vez más como sustrato operativo para razonamiento y ejecución de agentes en sistemas agentes, con código como capa de infraestructura unificada.",
        "de": "Große Sprachmodelle werden als Grundlage für Agenten-Reasoning in agenten Systemen verwendet, wobei Code als einheitliches Infrastrukturlayer dient."
      }
    },
    {
      "arxivId": "2605.18739",
      "title": "LongLive-2.0: An NVFP4 Parallel Infrastructure for Long Video Generation",
      "summary": "LongLive-2.0 presents an NVFP4-based parallel infrastructure for long video generation that addresses training and inference bottlenecks through sequence-parallel autoregressive training and diffusion model tuning.",
      "authors": [
        "Yukang Chen",
        "Luozhou Wang",
        "Wei Huang",
        "Shuai Yang",
        "Bohan Zhang",
        "Yicheng Xiao"
      ],
      "organization": {
        "_id": "60262b67268c201cdc8b7d43",
        "name": "nvidia",
        "fullname": "NVIDIA",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/65df9200dc3292a8983e5017/Vs5FPVCH-VZBipV3qKTuy.png"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 108,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18739.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18739",
      "pdfUrl": "https://arxiv.org/pdf/2605.18739.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18739",
      "githubRepo": "https://github.com/NVlabs/LongLive",
      "githubStars": 1644,
      "keywords": [
        "NVFP4",
        "sequence-parallel autoregressive training",
        "Balanced SP",
        "teacher-forcing layout",
        "VAE encoding",
        "diffusion model"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "LongLive-2.0：基于NVFP4的长视频生成并行架构",
      "summary_zh": "提出基于NVFP4的并行架构，通过序列并行自回归训练和扩散模型调优解决长视频生成的训练与推理瓶颈",
      "title_i18n": {
        "en": "LongLive-2.0: An NVFP4 Parallel Infrastructure for Long Video Generation",
        "zh-CN": "LongLive-2.0：基于NVFP4的长视频生成并行架构",
        "ja": "LongLive-2.0: 長い動画生成のためのNVFP4並列インフラストラクチャ",
        "ko": "LongLive-2.0: An NVFP4 Parallel Infrastructure for Long Video Generation",
        "es": "LongLive-2.0: Infraestructura NVFP4 Paralela para Generación de Vídeos Largos",
        "de": "LongLive-2.0: An NVFP4 Parallel Infrastructure for Long Video Generation"
      },
      "summary_i18n": {
        "en": "LongLive-2.0 presents an NVFP4-based parallel infrastructure for long video generation that addresses training and inference bottlenecks through sequence-parallel autoregressive training and diffusion model tuning.",
        "zh-CN": "提出基于NVFP4的并行架构，通过序列并行自回归训练和扩散模型调优解决长视频生成的训练与推理瓶颈",
        "ja": "LongLive-2.0は、シーケンス並列の自己回帰トレーニングと拡散モデルチューニングを通じて、トレーニングおよび推論のボトルネックを解決するNVFP4に基づく並列インフラストラクチャを提示する。",
        "ko": "LongLive-2.0은 시퀀스 병렬 자기 회귀 훈련 및 확산 모델 튜닝을 통해 장비 영상 생성을 위한 NVFP4 기반 병렬 인프라를 제시합니다.",
        "es": "LongLive-2.0 presenta una infraestructura paralela basada en NVFP4 para generación de videos largos, abordando cuellos de botella de entrenamiento e inferencia mediante entrenamiento autoregresivo secuencial.",
        "de": "LongLive-2.0 präsentiert eine parallele Infrastruktur für lange Videogenerierung, die Trainings- und Inferenzprobleme löst."
      }
    },
    {
      "arxivId": "2605.19833",
      "title": "Mega-ASR: Towards In-the-wild^2 Speech Recognition via Scaling up Real-world Acoustic Simulation",
      "summary": "Mega-ASR framework improves robustness in real-world speech recognition through compound-data construction and progressive acoustic-to-semantic optimization techniques.",
      "authors": [
        "Zhifei Xie",
        "Kaiyu Pang",
        "Haobin Zhang",
        "Deheng Ye",
        "Xiaobin Hu",
        "Shuicheng Yan"
      ],
      "organization": {
        "_id": "6508ab2b349930913196378b",
        "name": "NationalUniversityofSingapore",
        "fullname": "National University of Singapore",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/630ca0817dacb93b33506ce7/ZYUmpSMsa5Whihw3me2Bw.png"
      },
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 125,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.19833.png",
      "arxivUrl": "https://arxiv.org/abs/2605.19833",
      "pdfUrl": "https://arxiv.org/pdf/2605.19833.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.19833",
      "githubRepo": "https://github.com/xzf-thu/Mega-ASR",
      "githubStars": 319,
      "keywords": [
        "automatic speech recognition",
        "acoustic robustness bottleneck",
        "compound-data construction",
        "progressive acoustic-to-semantic optimization",
        "Acoustic-to-Semantic Progressive Supervised Fine-Tuning",
        "Dual-Granularity WER-Gated Policy Optimization"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Mega-ASR：通过真实声学模拟扩展实现野外语音识别",
      "summary_zh": "Mega-ASR通过构建复合数据和渐进声学到语义优化提升真实场景下的语音识别鲁棒性。",
      "title_i18n": {
        "en": "Mega-ASR: Towards In-the-wild^2 Speech Recognition via Scaling up Real-world Acoustic Simulation",
        "zh-CN": "Mega-ASR：通过真实声学模拟扩展实现野外语音识别",
        "ja": "Mega-ASR: 実世界の音響シミュレーションの拡大によるIn-the-wild^2音声認識への道",
        "ko": "Mega-ASR: Towards In-the-wild^2 Speech Recognition via Scaling up Real-world Acoustic Simulation",
        "es": "Mega-ASR: Reconocimiento de voz en el mundo real^2 mediante simulación acústica real",
        "de": "Mega-ASR: Towards In-the-wild^2 Speech Recognition via Scaling up Real-world Acoustic Simulation"
      },
      "summary_i18n": {
        "en": "Mega-ASR framework improves robustness in real-world speech recognition through compound-data construction and progressive acoustic-to-semantic optimization techniques.",
        "zh-CN": "Mega-ASR通过构建复合数据和渐进声学到语义优化提升真实场景下的语音识别鲁棒性。",
        "ja": "Mega-ASRフレームワークは、複合データ構築と段階的な音響-意味最適化技術を通じて、現実世界の音声認識のロバスト性を向上させる。",
        "ko": "Mega-ASR 프레임워크는 복합 데이터 구성 및 점진적인 음향-의미 최적화 기술을 통해 실제 환경에서의 음성 인식을 개선합니다.",
        "es": "El marco Mega-ASR mejora la robustez en reconocimiento de voz en el mundo real mediante construcción de datos compuestos y optimización progresiva acústico-semántica.",
        "de": "Mega-ASR verbessert Robustheit in Echtwelt-Spracherkennung durch Datenkonstruktion und akustische Optimierungstechniken."
      }
    },
    {
      "arxivId": "2605.18401",
      "title": "SkillsVote: Lifecycle Governance of Agent Skills from Collection, Recommendation to Evolution",
      "summary": "SkillsVote is a governance framework for long-horizon LLM agents that manages reusable skills through structured collection, recommendation, and evolution processes.",
      "authors": [
        "Hongyi Liu",
        "Haoyan Yang",
        "Tao Jiang",
        "Bo Tang",
        "Feiyu Xiong",
        "Zhiyu Li"
      ],
      "organization": {
        "_id": "658bf18c135580745c529d53",
        "name": "IAAR-Shanghai",
        "fullname": "Memtensor Research Group",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/62a155e615eeab266b2f2243/ONfcDR9Ox8AVtXtrAR4FG.png"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 122,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18401.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18401",
      "pdfUrl": "https://arxiv.org/pdf/2605.18401.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18401",
      "githubRepo": "https://github.com/MemTensor/skills-vote",
      "githubStars": 252,
      "keywords": [
        "Agent Skills",
        "experience schema",
        "executable scripts",
        "skill ecosystems",
        "lifecycle-governance framework",
        "environment requirements"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "SkillsVote：代理技能的生命周期治理框架",
      "summary_zh": "提出SkillsVote框架，通过结构化收集、推荐和进化过程管理长周期LLM代理的可复用技能",
      "title_i18n": {
        "en": "SkillsVote: Lifecycle Governance of Agent Skills from Collection, Recommendation to Evolution",
        "zh-CN": "SkillsVote：代理技能的生命周期治理框架",
        "ja": "SkillsVote: エージェントスキルのライフサイクルガバナンス（収集、推薦から進化まで）",
        "ko": "SkillsVote: Lifecycle Governance of Agent Skills from Collection, Recommendation to Evolution",
        "es": "SkillsVote: Gobernanza del ciclo de vida de habilidades de agente desde recolección, recomendación hasta evolución",
        "de": "SkillsVote: Lifecycle Governance of Agent Skills from Collection, Recommendation to Evolution"
      },
      "summary_i18n": {
        "en": "SkillsVote is a governance framework for long-horizon LLM agents that manages reusable skills through structured collection, recommendation, and evolution processes.",
        "zh-CN": "提出SkillsVote框架，通过结构化收集、推荐和进化过程管理长周期LLM代理的可复用技能",
        "ja": "SkillsVoteは長期的なLLMエージェントのスキルの管理を可能にするガバナンスフレームワークであり、構造的な収集、推薦、進化のプロセスを通じて再利用可能なスキルを管理する。",
        "ko": "SkillsVote는 수익성 있는 기술을 구조화된 수집, 추천 및 진화 과정을 통해 관리하는 장기적 LLM 에이전트의 거버넌스 프레임워크입니다.",
        "es": "SkillsVote es un marco de gobernanza para agentes LLM de largo plazo que gestiona habilidades reutilizables mediante procesos estructurados de recolección, recomendación y evolución.",
        "de": "SkillsVote ist ein Governance-Framework für LLM-Agenten, das Wiederverwendbarkeit durch strukturierte Prozesse verwaltet."
      }
    },
    {
      "arxivId": "2605.13527",
      "title": "MMSkills: Towards Multimodal Skills for General Visual Agents",
      "summary": "Multimodal procedural knowledge frameworks enable visual agents to leverage external reusable skills through structured representations combining text, state cards, and visual keyframes, improving decision-making in complex environments.",
      "authors": [
        "Kangning Zhang",
        "Shuai Shao",
        "Qingyao Li",
        "Jianghao Lin",
        "Lingyue Fu",
        "Shijian Wang"
      ],
      "organization": {
        "_id": "63ec8ce89d77b7eb70568340",
        "name": "ShanghaiJiaotongUniversity",
        "fullname": "Shanghai Jiaotong University 1(NOT OFFICIAL)",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ec8c599d77b7eb70567d94/aD8jb0IbftwEH_V1kffGG.jpeg"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 116,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.13527.png",
      "arxivUrl": "https://arxiv.org/abs/2605.13527",
      "pdfUrl": "https://arxiv.org/pdf/2605.13527.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.13527",
      "githubRepo": "https://github.com/DeepExperience/MMSkills",
      "githubStars": 136,
      "keywords": [
        "multimodal procedural knowledge",
        "visual agents",
        "skill packages",
        "state-conditioned packages",
        "visual grounding",
        "agentic trajectory-to-skill Generator"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "MMSkills：面向通用视觉代理的多模态技能",
      "summary_zh": "构建多模态程序知识框架，通过文本、状态卡和视觉关键帧提升视觉代理决策能力",
      "title_i18n": {
        "en": "MMSkills: Towards Multimodal Skills for General Visual Agents",
        "zh-CN": "MMSkills：面向通用视觉代理的多模态技能",
        "ja": "MMSkills: 一般的な視覚エージェントのためのマルチモーダルスキル",
        "ko": "MMSkills: Towards Multimodal Skills for General Visual Agents",
        "es": "MMSkills: Habilidades Multimodales para Agentes Visuales Generales",
        "de": "MMSkills: Towards Multimodal Skills for General Visual Agents"
      },
      "summary_i18n": {
        "en": "Multimodal procedural knowledge frameworks enable visual agents to leverage external reusable skills through structured representations combining text, state cards, and visual keyframes, improving decision-making in complex environments.",
        "zh-CN": "构建多模态程序知识框架，通过文本、状态卡和视觉关键帧提升视觉代理决策能力",
        "ja": "マルチモーダルの手続き的知識フレームワークは、構造化された表現を通じて外部の再利用可能なスキルを活用し、複雑な環境での意思決定を改善する。",
        "ko": "다중 모달 프로시저 지식 프레임워크는 복잡한 환경에서 의사결정을 개선하기 위해 텍스트, 상태 카드 및 시각 키프레임을 결합한 구조화된 표현을 통해 외부 재사용 가능한 기술을 활용합니다.",
        "es": "Marcos de conocimiento procedural multimodal permiten a agentes visuales aprovechar habilidades reutilizables externas mediante representaciones estructuradas que combinan texto, tarjetas de estado y fotogramas visuales.",
        "de": "Multimodale Wissensrahmen ermöglichen visuellen Agenten, externe Fähigkeiten durch strukturierte Darstellungen zu nutzen und Entscheidungen zu verbessern."
      }
    },
    {
      "arxivId": "2605.22109",
      "title": "Perception or Prejudice: Can MLLMs Go Beyond First Impressions of Personality?",
      "summary": "Researchers introduce a new task and dataset for evaluating personality reasoning in multimodal language models, revealing significant gaps between accurate predictions and grounded reasoning processes.",
      "authors": [
        "Caixin Kang",
        "Tianyu Yan",
        "Sitong Gong",
        "Mingfang Zhang",
        "Liangyang Ouyang",
        "Ruicong Liu"
      ],
      "organization": {
        "_id": "6a0fbe68162dc5d32a0057a0",
        "name": "utokyo-ai",
        "fullname": "The University of Tokyo",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/63a286c7f30c4642278ed11a/jNLYa73JGG4k3ywpoD_9k.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 153,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22109.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22109",
      "pdfUrl": "https://arxiv.org/pdf/2605.22109.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22109",
      "githubRepo": "https://github.com/kkkcx/MM-OCEAN",
      "githubStars": 5,
      "keywords": [
        "Multimodal Large Language Models",
        "Big Five score prediction",
        "Grounded Personality Reasoning",
        "MM-OCEAN dataset",
        "chain of rating",
        "reasoning"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "感知还是偏见：MLLM能否超越人格的第一印象？",
      "summary_zh": "提出新任务和数据集评估多模态语言模型的人格推理，揭示准确预测与合理推理间的显著差距。",
      "title_i18n": {
        "en": "Perception or Prejudice: Can MLLMs Go Beyond First Impressions of Personality?",
        "zh-CN": "感知还是偏见：MLLM能否超越人格的第一印象？",
        "ja": "Perception or Prejudice: Can MLLMs Go Beyond First Impressions of Personality?",
        "ko": "Perception or Prejudice: Can MLLMs Go Beyond First Impressions of Personality?",
        "es": "Percepción o prejuicio: ¿Pueden los MLLMs superar las primeras impresiones de personalidad?",
        "de": "Perception oder Vorurteil: Können MLLMs über erste Eindrücke der Persönlichkeit hinausgehen?"
      },
      "summary_i18n": {
        "en": "Researchers introduce a new task and dataset for evaluating personality reasoning in multimodal language models, revealing significant gaps between accurate predictions and grounded reasoning processes.",
        "zh-CN": "提出新任务和数据集评估多模态语言模型的人格推理，揭示准确预测与合理推理间的显著差距。",
        "ja": "マルチモーダル言語モデルにおける性格推論の評価を目的とした新しいタスクとデータセットが導入され、正確な予測と根拠のある推論プロセスの間に大きなギャップが明らかにされた。",
        "ko": "Researchers introduce a new task and dataset for evaluating personality reasoning in multimodal language models, revealing significant gaps.",
        "es": "Los investigadores introducen una nueva tarea y conjunto de datos para evaluar el razonamiento de la personalidad en modelos multilingües multimodales, revelando grandes brechas entre predicciones precisas y procesos de razonamiento fundamentados.",
        "de": "Forscher führen eine neue Aufgabe und ein Datensatz für die Bewertung der Persönlichkeitsreasoning in multimodalen Sprachmodellen ein, wobei erhebliche Lücken zwischen genauen Vorhersagen und fundierten Schlussfolgerungen offenbar werden."
      }
    },
    {
      "arxivId": "2605.18678",
      "title": "Lance: Unified Multimodal Modeling by Multi-Task Synergy",
      "summary": "Lance is a unified multimodal model that combines understanding, generation, and editing capabilities for images and videos through collaborative multi-task training and a dual-stream architecture.",
      "authors": [
        "Fengyi Fu",
        "Mengqi Huang",
        "Shaojin Wu",
        "Yunsheng Jiang",
        "Yufei Huo",
        "Hao Li"
      ],
      "organization": {
        "_id": "66bc9cc55b64185087601c60",
        "name": "bytedance-research",
        "fullname": "bytedance-research",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6535c9e88bde2fae19b6fb25/7a1zq0juEwFJVCIShnLI-.png"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 71,
      "comments": 4,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18678.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18678",
      "pdfUrl": "https://arxiv.org/pdf/2605.18678.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18678",
      "githubRepo": "https://github.com/bytedance/Lance",
      "githubStars": 766,
      "keywords": [
        "mixture-of-experts architecture",
        "multimodal understanding",
        "multimodal generation",
        "multimodal editing",
        "collaborative multi-task training",
        "unified context modeling"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Lance：通过多任务协同的统一多模态建模",
      "summary_zh": "Lance通过多任务协同和双流架构，实现图像和视频的理解、生成与编辑的统一建模。",
      "title_i18n": {
        "en": "Lance: Unified Multimodal Modeling by Multi-Task Synergy",
        "zh-CN": "Lance：通过多任务协同的统一多模态建模",
        "ja": "Lance: Unified Multimodal Modeling by Multi-Task Synergy",
        "ko": "Lance: Unified Multimodal Modeling by Multi-Task Synergy",
        "es": "Lance: Modelado multimodal unificado mediante síntesis multitarea",
        "de": "Lance: Einheitliche Multimodalmodellierung durch Multi-Aufgaben-Synergie"
      },
      "summary_i18n": {
        "en": "Lance is a unified multimodal model that combines understanding, generation, and editing capabilities for images and videos through collaborative multi-task training and a dual-stream architecture.",
        "zh-CN": "Lance通过多任务协同和双流架构，实现图像和视频的理解、生成与编辑的统一建模。",
        "ja": "Lanceは、協調的なマルチタスク学習と二重ストリームアーキテクチャを通じて、画像および動画の理解、生成、編集機能を統合したマルチモーダルモデルである。",
        "ko": "Lance is a unified multimodal model that combines understanding, generation, and editing capabilities for images and videos.",
        "es": "Lance es un modelo multimodal unificado que combina capacidades de comprensión, generación y edición para imágenes y videos mediante entrenamiento multitarea colaborativo y una arquitectura de doble flujo.",
        "de": "Lance ist ein einheitliches Multimodalmodell, das Verständnis, Generierung und Bearbeitungsfähigkeiten für Bilder und Videos durch kooperative Multi-Aufgaben-Training und eine Dual-Stream-Architektur kombiniert."
      }
    },
    {
      "arxivId": "2605.11609",
      "title": "Anti-Self-Distillation for Reasoning RL via Pointwise Mutual Information",
      "summary": "Anti-Self-Distillation reverses the direction of knowledge transfer in self-distillation to improve math reasoning efficiency and accuracy.",
      "authors": [
        "Guobin Shen",
        "Xiang Cheng",
        "Chenxiao Zhao",
        "Lei Huang",
        "Jindong Li",
        "Dongcheng Zhao"
      ],
      "organization": {
        "_id": "68246a0a98117c02df67a547",
        "name": "rednote-hilab",
        "fullname": "rednote-hilab",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png"
      },
      "publishedAt": "2026-05-12T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 187,
      "comments": 4,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.11609.png",
      "arxivUrl": "https://arxiv.org/abs/2605.11609",
      "pdfUrl": "https://arxiv.org/pdf/2605.11609.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.11609",
      "githubRepo": "https://github.com/FloyedShen/AntiSD",
      "githubStars": 11,
      "keywords": [
        "self-distillation",
        "privileged context",
        "pointwise mutual information",
        "entropy-triggered gate",
        "GRPO baseline",
        "language model"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "通过点互信息的反自蒸馏用于推理强化学习",
      "summary_zh": "使用点互信息实现反自蒸馏，提升推理强化学习的效率和准确性。",
      "title_i18n": {
        "en": "Anti-Self-Distillation for Reasoning RL via Pointwise Mutual Information",
        "zh-CN": "通过点互信息的反自蒸馏用于推理强化学习",
        "ja": "Anti-Self-Distillation for Reasoning RL via Pointwise Mutual Information",
        "ko": "Anti-Self-Distillation for Reasoning RL via Pointwise Mutual Information",
        "es": "Anti-Distilación de Sí Mismo para RL con Razonamiento mediante Información Mutua Puntual",
        "de": "Anti-Selbst-Destillation für Reasoning RL via Pointwise Mutual Information"
      },
      "summary_i18n": {
        "en": "Anti-Self-Distillation reverses the direction of knowledge transfer in self-distillation to improve math reasoning efficiency and accuracy.",
        "zh-CN": "使用点互信息实现反自蒸馏，提升推理强化学习的效率和准确性。",
        "ja": "自己蒸留の知識転送方向を逆転させることで、数学的推論の効率と精度が向上する。",
        "ko": "Anti-Self-Distillation reverses knowledge transfer in self-distillation to improve math reasoning efficiency and accuracy.",
        "es": "La anti-distilación de sí mismo invierte la dirección de la transferencia de conocimiento en la distilación de sí mismo para mejorar la eficiencia y precisión del razonamiento matemático.",
        "de": "Anti-Selbst-Destillation kehrt die Richtung des Wissensübertragungsprozesses in der Selbst-Destillation um, um die Effizienz und Genauigkeit bei mathematischem Reasoning zu verbessern."
      }
    },
    {
      "arxivId": "2605.14678",
      "title": "π-Bench: Evaluating Proactive Personal Assistant Agents in Long-Horizon Workflows",
      "summary": "Proactive assistance in personal agent systems requires identifying hidden user intents through sustained multi-turn interactions, which current benchmarks fail to adequately evaluate.",
      "authors": [
        "Haoran Zhang",
        "Luxin Xu",
        "Zhilin Wang",
        "Runquan Gui",
        "Shunkai Zhang",
        "Haodi Lei"
      ],
      "organization": {
        "_id": "6a03a74a34f1cffc0570e62d",
        "name": "Simplified-Reasoning",
        "fullname": "Simplified Reasoning",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/62495cb96ee7ee6b646db130/S-AqbmitJFm2PxXyZrX7H.jpeg"
      },
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 88,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14678.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14678",
      "pdfUrl": "https://arxiv.org/pdf/2605.14678.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14678",
      "githubRepo": "https://github.com/Simplified-Reasoning/Pi-Bench",
      "githubStars": 31,
      "keywords": [
        "personal assistant agents",
        "large language models",
        "proactive assistance",
        "user intents",
        "multi-turn interactions",
        "domain-specific user personas"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "π-Bench：评估长流程中主动个人助手代理",
      "summary_zh": "提出π-Bench评估长流程中主动个人助手，基于GPT和LoRA方法提升用户意图识别性能",
      "title_i18n": {
        "en": "π-Bench: Evaluating Proactive Personal Assistant Agents in Long-Horizon Workflows",
        "zh-CN": "π-Bench：评估长流程中主动个人助手代理",
        "ja": "π-Bench: Evaluating Proactive Personal Assistant Agents in Long-Horizon Workflows",
        "ko": "π-Bench: Evaluating Proactive Personal Assistant Agents in Long-Horizon Workflows",
        "es": "π-Bench: Evaluar agentes asistentes personales proactivos en flujos de trabajo a largo plazo",
        "de": "π-Bench: Bewertung proaktiver persönlicher Assistenten in langfristigen Workflows"
      },
      "summary_i18n": {
        "en": "Proactive assistance in personal agent systems requires identifying hidden user intents through sustained multi-turn interactions, which current benchmarks fail to adequately evaluate.",
        "zh-CN": "提出π-Bench评估长流程中主动个人助手，基于GPT和LoRA方法提升用户意图识别性能",
        "ja": "長期的なワークフローにおけるパーソナルエージェントの能動的支援には、継続的な対話による隠れたユーザー意図の特定が必要だが、現行ベンチマークでは不十分である。",
        "ko": "Proactive assistance requires identifying hidden user intents through sustained interactions, which current benchmarks fail to evaluate.",
        "es": "La asistencia proactiva en sistemas de agentes personales requiere identificar intenciones ocultas del usuario mediante interacciones multi-turno sostenidas, lo cual los benchmarks actuales no evalúan adecuadamente.",
        "de": "Proaktive Unterstützung in persönlichen Agentensystemen erfordert die Identifizierung versteckter Benutzerabsichten durch nachhaltige Mehrrundeninteraktionen, was aktuelle Benchmarks nicht ausreichend bewerten."
      }
    },
    {
      "arxivId": "2605.14747",
      "title": "Video2GUI: Synthesizing Large-Scale Interaction Trajectories for Generalized GUI Agent Pretraining",
      "summary": "A large-scale GUI dataset was created by automatically extracting interaction trajectories from internet videos, enabling improved performance in GUI agents through pre-training on this diverse collection.",
      "authors": [
        "Weimin Xiong",
        "Shuhao Gu",
        "Bowen Ye",
        "Zihao Yue",
        "Lei Li",
        "Feifan Song"
      ],
      "organization": {
        "_id": "61dcd8e344f59573371b5cb6",
        "name": "PekingUniversity",
        "fullname": "Peking University",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/vavgrBsnkSejriUF4lXDE.png"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 142,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14747.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14747",
      "pdfUrl": "https://arxiv.org/pdf/2605.14747.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14747",
      "githubRepo": "https://github.com/WeiminXiong/Video2GUI",
      "githubStars": 26,
      "keywords": [
        "multimodal large language models",
        "graphical user interface agents",
        "GUI grounding",
        "action benchmarks",
        "pre-training",
        "GUI interaction trajectories"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Video2GUI：合成大规模交互轨迹用于通用GUI代理预训练",
      "summary_zh": "通过从互联网视频中自动提取交互轨迹构建大规模GUI数据集，提升GUI代理预训练性能",
      "title_i18n": {
        "en": "Video2GUI: Synthesizing Large-Scale Interaction Trajectories for Generalized GUI Agent Pretraining",
        "zh-CN": "Video2GUI：合成大规模交互轨迹用于通用GUI代理预训练",
        "ja": "Video2GUI: Synthesizing Large-Scale Interaction Trajectories for Generalized GUI Agent Pretraining",
        "ko": "Video2GUI: Synthesizing Large-Scale Interaction Trajectories for Generalized GUI Agent Pretraining",
        "es": "Video2GUI: Sintetizar trayectorias de interacción a gran escala para preentrenamiento de agentes GUI generalizados",
        "de": "Video2GUI: Synthetisieren von großskaligen Interaktionsverläufen für generalisierte GUI-Agenten-Vortraining"
      },
      "summary_i18n": {
        "en": "A large-scale GUI dataset was created by automatically extracting interaction trajectories from internet videos, enabling improved performance in GUI agents through pre-training on this diverse collection.",
        "zh-CN": "通过从互联网视频中自动提取交互轨迹构建大规模GUI数据集，提升GUI代理预训练性能",
        "ja": "インターネット動画からインタラクショントレースを自動抽出し、広範なGUIエージェントの前処理に役立てる大規模GUIデータセットが作成された。",
        "ko": "A large-scale GUI dataset was created by extracting interaction trajectories from internet videos for improved GUI agent performance.",
        "es": "Se creó un conjunto de datos de GUI a gran escala extrayendo automáticamente trayectorias de interacción de videos de internet, mejorando el rendimiento de agentes GUI mediante preentrenamiento en esta colección diversa.",
        "de": "Ein großes GUI-Datensatz wurde durch automatisches Extrahieren von Interaktionsverläufen aus Internetvideos erstellt, um die Leistung von GUI-Agenten durch Vortraining auf dieser Vielfalt zu verbessern."
      }
    },
    {
      "arxivId": "2605.15298",
      "title": "PhysBrain 1.0 Technical Report",
      "summary": "PhysBrain 1.0 leverages human egocentric video to generate physical commonsense supervision for vision-language-action models, achieving state-of-the-art performance in embodied control tasks through capability-preserving adaptation.",
      "authors": [
        "Shijie Lian",
        "Bin Yu",
        "Xiaopeng Lin",
        "Changti Wu",
        "Hang Yuan",
        "Xiaolin Hu"
      ],
      "organization": {
        "_id": "6948d884070dda0c2ae35a78",
        "name": "DeepCybo",
        "fullname": "DeepCybo",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/65ec01fd770aa0e25d9374dc/QOsz6P_7AxyqGrjsRHTGk.png"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 139,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.15298.png",
      "arxivUrl": "https://arxiv.org/abs/2605.15298",
      "pdfUrl": "https://arxiv.org/pdf/2605.15298.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.15298",
      "githubRepo": "https://github.com/Phys-Brain/PhysBrain-VLA",
      "githubStars": 24,
      "keywords": [
        "vision-language-action models",
        "physical commonsense supervision",
        "multimodal QA benchmarks",
        "embodied control benchmarks",
        "VLA policies",
        "capability-preserving adaptation"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "PhysBrain 1.0 技术报告",
      "summary_zh": "利用人类第一视角视频生成物理常识监督，提升视觉-语言-动作模型在具身控制任务中的性能",
      "title_i18n": {
        "en": "PhysBrain 1.0 Technical Report",
        "zh-CN": "PhysBrain 1.0 技术报告",
        "ja": "PhysBrain 1.0 Technical Report",
        "ko": "PhysBrain 1.0 Technical Report",
        "es": "Informe Técnico de PhysBrain 1.0",
        "de": "PhysBrain 1.0 Technischer Bericht"
      },
      "summary_i18n": {
        "en": "PhysBrain 1.0 leverages human egocentric video to generate physical commonsense supervision for vision-language-action models, achieving state-of-the-art performance in embodied control tasks through capability-preserving adaptation.",
        "zh-CN": "利用人类第一视角视频生成物理常识监督，提升视觉-语言-动作模型在具身控制任务中的性能",
        "ja": "PhysBrain 1.0は、人間のエゴセントリックビデオを利用して物理的常識の教師データを生成し、エンベデッドコントロールタスクで最適な性能を達成した。",
        "ko": "PhysBrain 1.0 uses human egocentric video to generate physical commonsense supervision for vision-language-action models.",
        "es": "PhysBrain 1.0 utiliza videos egocéntricos humanos para generar supervisión de sentido común físico para modelos vision-language-action, logrando un rendimiento de vanguardia en tareas de control encarnado mediante adaptación preservadora de capacidades.",
        "de": "PhysBrain 1.0 nutzt menschliche egozentrische Videos, um physische Alltagswissen zur Verfügung zu stellen, und erreicht führende Ergebnisse in embodied control Aufgaben durch anpassungsfähige Fähigkeiten."
      }
    },
    {
      "arxivId": "2605.21467",
      "title": "DelTA: Discriminative Token Credit Assignment for Reinforcement Learning from Verifiable Rewards",
      "summary": "Reinforcement learning from verifiable rewards is enhanced through a discriminative token credit assignment method that improves reward-based training by amplifying distinctive token-gradient directions and reducing noise from shared patterns.",
      "authors": [
        "Kaiyi Zhang",
        "Wei Wu",
        "Yankai Lin"
      ],
      "organization": null,
      "publishedAt": "2026-05-20T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 125,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.21467.png",
      "arxivUrl": "https://arxiv.org/abs/2605.21467",
      "pdfUrl": "https://arxiv.org/pdf/2605.21467.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.21467",
      "githubRepo": "https://github.com/RUCBM/DelTA",
      "githubStars": 4,
      "keywords": [
        "reinforcement learning from verifiable rewards",
        "policy-gradient update",
        "token-gradient vectors",
        "advantage-weighted averaging",
        "discriminative token credit assignment",
        "self-normalized RLVR surrogate"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "DelTA：基于可验证奖励的判别性标记信用分配",
      "summary_zh": "提出DelTA方法，通过增强独特标记梯度方向提升可验证奖励的强化学习效果。",
      "title_i18n": {
        "en": "DelTA: Discriminative Token Credit Assignment for Reinforcement Learning from Verifiable Rewards",
        "zh-CN": "DelTA：基于可验证奖励的判别性标记信用分配",
        "ja": "DelTA: Discriminative Token Credit Assignment for Reinforcement Learning from Verifiable Rewards",
        "ko": "DelTA: Discriminative Token Credit Assignment for Reinforcement Learning from Verifiable Rewards",
        "es": "DelTA: Asignación de crédito de tokens discriminativa para aprendizaje por refuerzo a partir de recompensas verificables",
        "de": "DelTA: Diskriminative Token-Kreditzuweisung für Verstärkungslernen aus verifizierbaren Belohnungen"
      },
      "summary_i18n": {
        "en": "Reinforcement learning from verifiable rewards is enhanced through a discriminative token credit assignment method that improves reward-based training by amplifying distinctive token-gradient directions and reducing noise from shared patterns.",
        "zh-CN": "提出DelTA方法，通过增强独特标记梯度方向提升可验证奖励的强化学习效果。",
        "ja": "検証可能な報酬からの強化学習は、特徴的なトークン勾配方向を強化し、共有パターンからのノイズを削減することで改善される。",
        "ko": "Reinforcement learning from verifiable rewards is enhanced through discriminative token credit assignment methods.",
        "es": "El aprendizaje por refuerzo a partir de recompensas verificables se mejora mediante un método de asignación de crédito de tokens discriminativo que mejora el entrenamiento basado en recompensas amplificando direcciones de gradiente de tokens distintivos y reduciendo el ruido de patrones compartidos.",
        "de": "Verstärkungslernen aus verifizierbaren Belohnungen wird durch eine diskriminative Token-Kreditzuweisungsmethode verbessert, die die Belohnungs-basierte Trainingsverbesserung durch Verstärkung unterschiedlicher Token-Gradientenrichtungen und Reduzierung von Rauschen aus gemeinsamen Mustern ermöglicht."
      }
    },
    {
      "arxivId": "2605.16403",
      "title": "When Vision Speaks for Sound",
      "summary": "Video-capable multimodal large language models exhibit apparent audio understanding driven by visual cues rather than actual audio processing, necessitating intervention-based frameworks for diagnosing and improving audio-visual alignment.",
      "authors": [
        "Xiaofei Wen",
        "Wenjie Jacky Mo",
        "Xingyu Fu",
        "Rui Cai",
        "Tinghui Zhu",
        "Wendi Li"
      ],
      "organization": {
        "_id": "642bbf2d09d8df6721eeddb5",
        "name": "ucdavis",
        "fullname": "UC Davis",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/642bbe935edcc5760cb0daf1/tq05HDRrfpezK0HSMTA_n.jpeg"
      },
      "publishedAt": "2026-05-13T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 95,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.16403.png",
      "arxivUrl": "https://arxiv.org/abs/2605.16403",
      "pdfUrl": "https://arxiv.org/pdf/2605.16403.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.16403",
      "githubRepo": "https://github.com/rakanWen/wvs-code",
      "githubStars": 31,
      "keywords": [
        "video-capable MLLMs",
        "audio-visual Clever Hans effect",
        "counterfactual audio edits",
        "temporal synchronization",
        "sound existence",
        "audio-visual consistency"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "当视觉为声音发声",
      "summary_zh": "通过引入LoRA方法改进视频多模态大模型的音画对齐效果",
      "title_i18n": {
        "en": "When Vision Speaks for Sound",
        "zh-CN": "当视觉为声音发声",
        "ja": "When Vision Speaks for Sound",
        "ko": "When Vision Speaks for Sound",
        "es": "Cuando la visión habla por el sonido",
        "de": "Wenn die Sicht spricht für den Klang"
      },
      "summary_i18n": {
        "en": "Video-capable multimodal large language models exhibit apparent audio understanding driven by visual cues rather than actual audio processing, necessitating intervention-based frameworks for diagnosing and improving audio-visual alignment.",
        "zh-CN": "通过引入LoRA方法改进视频多模态大模型的音画对齐效果",
        "ja": "ビデオ対応マルチモーダル大規模言語モデルは、実際の音声処理ではなく視覚的ヒントによって音声理解が示されるため、音声・映像の整合性を診断・改善する枠組みが必要である。",
        "ko": "Video-capable multimodal models show apparent audio understanding driven by visual cues, requiring frameworks for audio-visual alignment.",
        "es": "Los modelos grandes de lenguaje multimodal con capacidad de video muestran una comprensión aparente del audio impulsada por señales visuales en lugar de procesamiento real de audio, requiriendo marcos basados en intervención para diagnosticar y mejorar el alineamiento audiovisual.",
        "de": "Multimodale große Sprachmodelle mit Video-Fähigkeiten zeigen scheinbare audioverstehende Fähigkeiten, die durch visuelle Hinweise statt echte Audioverarbeitung motiviert sind, was interventionsbasierte Rahmenbedingungen zur Diagnose und Verbesserung der Audio-Visuelle Ausrichtung erfordert."
      }
    },
    {
      "arxivId": "2605.18661",
      "title": "AI for Auto-Research: Roadmap & User Guide",
      "summary": "AI systems demonstrate varying reliability across research stages, excelling in structured tasks but struggling with novel ideas and scientific judgment, necessitating human oversight for credible outcomes.",
      "authors": [
        "Lingdong Kong",
        "Xian Sun",
        "Wei Chow",
        "Linfeng Li",
        "Kevin Qinghong Lin",
        "Xuan Billy Zhang"
      ],
      "organization": null,
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 61,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18661.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18661",
      "pdfUrl": "https://arxiv.org/pdf/2605.18661.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18661",
      "githubRepo": "https://github.com/worldbench/awesome-ai-auto-research",
      "githubStars": 129,
      "keywords": [
        "AI-assisted research",
        "automated systems",
        "research papers",
        "long-horizon agents",
        "scientific integrity",
        "epistemological phases"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "AI用于自动研究：路线图与用户指南",
      "summary_zh": "利用GPT和LoRA提升研究可靠性，优化AI在科学判断中的表现。",
      "title_i18n": {
        "en": "AI for Auto-Research: Roadmap & User Guide",
        "zh-CN": "AI用于自动研究：路线图与用户指南",
        "ja": "AI for Auto-Research: Roadmap & User Guide",
        "ko": "AI for Auto-Research: Roadmap & User Guide",
        "es": "IA para Investigación Automática: Mapa de ruta y guía para usuarios",
        "de": "AI for Auto-Research: Roadmap & User Guide"
      },
      "summary_i18n": {
        "en": "AI systems demonstrate varying reliability across research stages, excelling in structured tasks but struggling with novel ideas and scientific judgment, necessitating human oversight for credible outcomes.",
        "zh-CN": "利用GPT和LoRA提升研究可靠性，优化AI在科学判断中的表现。",
        "ja": "AIシステムは研究段階で信頼性が異なる。構造化されたタスクでは優れているが、新しいアイデアや科学的判断には苦手で、信頼できる結果には人間の監視が必要である。",
        "ko": "AI 시스템은 연구 단계에서 신뢰도가 다양하며 구조화된 작업에서는 우수하지만, 새로운 아이디어와 과학적 판단에는 어려움을 겪는다.",
        "es": "Los sistemas de IA muestran confiabilidad variable en las etapas de investigación, destacando en tareas estructuradas pero lidiando con ideas novedosas y juicio científico, requiriendo supervisión humana para resultados creíbles.",
        "de": "Künstliche Intelligenz zeigt unterschiedliche Zuverlässigkeit in Forschungsphasen, ist in strukturierten Aufgaben gut, aber schwach bei neuen Ideen und wissenschaftlicher Urteilsbildung."
      }
    },
    {
      "arxivId": "2605.20266",
      "title": "A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook",
      "summary": "Large Audio Language Models exhibit significant trustworthiness challenges despite performance advances, requiring comprehensive frameworks addressing security vulnerabilities and defensive strategies.",
      "authors": [
        "Kaiwen Luo",
        "Zhenhong Zhou",
        "Leo Wang",
        "Liang Lin",
        "Yang Xiao",
        "Tianyu Shao"
      ],
      "organization": {
        "_id": "6508b28cf36bb51c50faad98",
        "name": "NanyangTechnologicalUniversity",
        "fullname": "Nanyang Technological University",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/630ca0817dacb93b33506ce7/ZPD1fvei0bcIGeDXxeSkn.png"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 52,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.20266.png",
      "arxivUrl": "https://arxiv.org/abs/2605.20266",
      "pdfUrl": "https://arxiv.org/pdf/2605.20266.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.20266",
      "githubRepo": "https://github.com/Kwwwww74/Awesome-Trustworthy-AudioLLMs",
      "githubStars": 198,
      "keywords": [
        "Large Language Models",
        "Multimodal Large Language Models",
        "Large Audio Language Models",
        "end-to-end frameworks",
        "acoustic signals",
        "attack surface"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "大型音频语言模型综述：泛化性、可信性与展望",
      "summary_zh": "综述大型音频语言模型的泛化性和可信性问题，提出全面框架解决安全漏洞和防御策略。",
      "title_i18n": {
        "en": "A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook",
        "zh-CN": "大型音频语言模型综述：泛化性、可信性与展望",
        "ja": "A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook",
        "ko": "A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook",
        "es": "Revisión de Modelos de Lenguaje Audio Grandes: Generalización, Confianza y Perspectiva",
        "de": "A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook"
      },
      "summary_i18n": {
        "en": "Large Audio Language Models exhibit significant trustworthiness challenges despite performance advances, requiring comprehensive frameworks addressing security vulnerabilities and defensive strategies.",
        "zh-CN": "综述大型音频语言模型的泛化性和可信性问题，提出全面框架解决安全漏洞和防御策略。",
        "ja": "大型オーディオ言語モデルは性能向上しているが、信頼性の問題があり、セキュリティの脆弱性と防御戦略を含む包括的な枠組みが必要である。",
        "ko": "대규모 음성 언어 모델은 성능 향상에도 불구하고 신뢰성 문제가 심각하며 보안 취약점과 방어 전략을 해결해야 한다.",
        "es": "Los modelos de lenguaje audio grandes presentan desafíos significativos de confianza a pesar de los avances en rendimiento, requiriendo marcos completos que aborden vulnerabilidades de seguridad y estrategias defensivas.",
        "de": "Große Audio-Sprachmodelle haben Schwierigkeiten mit Vertrauenswürdigkeit, obwohl sie Leistungen verbessern, benötigen Rahmenbedingungen für Sicherheitslücken und Verteidigungsstrategien."
      }
    },
    {
      "arxivId": "2605.21572",
      "title": "PhysX-Omni: Unified Simulation-Ready Physical 3D Generation for Rigid, Deformable, and Articulated Objects",
      "summary": "PhysX-Omni presents a unified framework for generating simulation-ready 3D assets with physical properties across multiple categories using a novel geometry representation and evaluation benchmarks.",
      "authors": [
        "Ziang Cao",
        "Yinghao Liu",
        "Haitian Li",
        "Runmao Yao",
        "Fangzhou Hong",
        "Zhaoxi Chen"
      ],
      "organization": null,
      "publishedAt": "2026-05-20T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 42,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.21572.png",
      "arxivUrl": "https://arxiv.org/abs/2605.21572",
      "pdfUrl": "https://arxiv.org/pdf/2605.21572.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.21572",
      "githubRepo": "https://github.com/physx-omni/PhysX-Omni",
      "githubStars": 62,
      "keywords": [
        "3D generation",
        "Vision-Language Models",
        "geometry representation",
        "PhysXVerse dataset",
        "PhysX-Bench",
        "embodied AI"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "PhysX-Omni：刚体、柔体和关节物体的统一物理3D生成",
      "summary_zh": "提出PhysX-Omni框架，使用新几何表示生成具有物理属性的模拟就绪3D资产，涵盖多种类别。",
      "title_i18n": {
        "en": "PhysX-Omni: Unified Simulation-Ready Physical 3D Generation for Rigid, Deformable, and Articulated Objects",
        "zh-CN": "PhysX-Omni：刚体、柔体和关节物体的统一物理3D生成",
        "ja": "PhysX-Omni: Unified Simulation-Ready Physical 3D Generation for Rigid, Deformable, and Articulated Objects",
        "ko": "PhysX-Omni: Unified Simulation-Ready Physical 3D Generation for Rigid, Deformable, and Articulated Objects",
        "es": "PhysX-Omni: Generación 3D Física lista para simulación unificada para objetos rígidos, deformables y articulados",
        "de": "PhysX-Omni: Unified Simulation-Ready Physical 3D Generation for Rigid, Deformable, and Articulated Objects"
      },
      "summary_i18n": {
        "en": "PhysX-Omni presents a unified framework for generating simulation-ready 3D assets with physical properties across multiple categories using a novel geometry representation and evaluation benchmarks.",
        "zh-CN": "提出PhysX-Omni框架，使用新几何表示生成具有物理属性的模拟就绪3D资产，涵盖多种类别。",
        "ja": "PhysX-Omniは、新しい幾何表現と評価ベンチマークを使用して、剛体、変形、関節物体のシミュレーション対応3D資産を統一的に生成するフレームワークを提示する。",
        "ko": "PhysX-Omni는 새로운 기하 표현과 평가 기준을 사용하여 물리적 속성을 갖춘 통합 3D 자산 생성 프레임워크를 제시한다.",
        "es": "PhysX-Omni presenta un marco unificado para generar activos 3D listos para simulación con propiedades físicas en múltiples categorías usando una representación geométrica innovadora y benchmarks de evaluación.",
        "de": "PhysX-Omni bietet einen einheitlichen Rahmen zur Generierung von physikalischen 3D-Objekten mit Simulationseigenschaften über mehrere Kategorien hinweg."
      }
    },
    {
      "arxivId": "2605.15824",
      "title": "FashionChameleon: Towards Real-Time and Interactive Human-Garment Video Customization",
      "summary": "FashionChameleon enables real-time interactive multi-garment video customization through teacher-student distillation and in-context learning techniques while maintaining motion coherence.",
      "authors": [
        "Quanjian Song",
        "Yefeng Shen",
        "Mengting Chen",
        "Hao Sun",
        "Jinsong Lan",
        "Xiaoyong Zhu"
      ],
      "organization": {
        "_id": "64488b334988ee01f2a8d856",
        "name": "alibaba-inc",
        "fullname": "alibaba-inc",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/61ac8f8a00d01045fca0ad2f/MX4wxQVaFm1A1wqnrL2WU.jpeg"
      },
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 57,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.15824.png",
      "arxivUrl": "https://arxiv.org/abs/2605.15824",
      "pdfUrl": "https://arxiv.org/pdf/2605.15824.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.15824",
      "githubRepo": "https://github.com/quanjiansong/FashionChameleon",
      "githubStars": 96,
      "keywords": [
        "autoregressive video generation",
        "in-context learning",
        "streaming distillation",
        "gradient-reweighted distribution matching",
        "kv cache rescheduling",
        "garment switching"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "FashionChameleon：实时交互式人体服装视频定制",
      "summary_zh": "通过教师-学生蒸馏和上下文学习实现多服装实时交互视频定制，保持动作连贯性",
      "title_i18n": {
        "en": "FashionChameleon: Towards Real-Time and Interactive Human-Garment Video Customization",
        "zh-CN": "FashionChameleon：实时交互式人体服装视频定制",
        "ja": "FashionChameleon: Towards Real-Time and Interactive Human-Garment Video Customization",
        "ko": "FashionChameleon: Towards Real-Time and Interactive Human-Garment Video Customization",
        "es": "FashionChameleon: Hacia la personalización en video de ropa humana en tiempo real e interactiva",
        "de": "FashionChameleon: Towards Real-Time and Interactive Human-Garment Video Customization"
      },
      "summary_i18n": {
        "en": "FashionChameleon enables real-time interactive multi-garment video customization through teacher-student distillation and in-context learning techniques while maintaining motion coherence.",
        "zh-CN": "通过教师-学生蒸馏和上下文学习实现多服装实时交互视频定制，保持动作连贯性",
        "ja": "FashionChameleonは、教師-生徒の知識蒸留と文脈内学習により、リアルタイムでインタラクティブな複数の服の動画カスタマイズを実現し、動きの整合性を維持する。",
        "ko": "FashionChameleon은 교사-학생 학습 및 맥락 내 학습 기법을 통해 실시간 다중 의류 비디오 맞춤화를 가능하게 한다.",
        "es": "FashionChameleon permite la personalización interactiva en tiempo real de múltiples prendas mediante técnicas de distilación maestro-estudiante y aprendizaje en contexto, manteniendo coherencia de movimiento.",
        "de": "FashionChameleon ermöglicht Echtzeit-Interaktion bei der Anpassung von Kleidung in Videos durch Lehrer-Student-Verfahren und Kontextlernen."
      }
    },
    {
      "arxivId": "2605.20119",
      "title": "Toto 2.0: Time Series Forecasting Enters the Scaling Era",
      "summary": "Time series foundation models demonstrate scalable forecasting performance across parameter sizes, with Toto 2.0 achieving state-of-the-art results on multiple benchmarks through a unified training approach.",
      "authors": [
        "Emaad Khwaja",
        "Chris Lettieri",
        "Gerald Woo",
        "Eden Belouadah",
        "Marc Cenac",
        "Guillaume Jarry"
      ],
      "organization": {
        "_id": "676d60964b96c8ead04106ea",
        "name": "Datadog",
        "fullname": "Datadog",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/64399c0deb7c5616ef401ae5/tIe52AF51aIyKzDtbvH2U.png"
      },
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 35,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.20119.png",
      "arxivUrl": "https://arxiv.org/abs/2605.20119",
      "pdfUrl": "https://arxiv.org/pdf/2605.20119.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.20119",
      "githubRepo": "https://github.com/DataDog/toto",
      "githubStars": 443,
      "keywords": [
        "time series foundation models",
        "forecasting models",
        "parameter scaling",
        "BOOM benchmark",
        "GIFT-Eval benchmark",
        "TIME benchmark"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Toto 2.0：时间序列预测进入扩展时代",
      "summary_zh": "Toto 2.0通过统一训练方法在多个基准上实现最优预测性能",
      "title_i18n": {
        "en": "Toto 2.0: Time Series Forecasting Enters the Scaling Era",
        "zh-CN": "Toto 2.0：时间序列预测进入扩展时代",
        "ja": "Toto 2.0: Time Series Forecasting Enters the Scaling Era",
        "ko": "Toto 2.0: Time Series Forecasting Enters the Scaling Era",
        "es": "Toto 2.0: La predicción de series temporales entra en la era de escalabilidad",
        "de": "Toto 2.0: Time Series Forecasting Enters the Scaling Era"
      },
      "summary_i18n": {
        "en": "Time series foundation models demonstrate scalable forecasting performance across parameter sizes, with Toto 2.0 achieving state-of-the-art results on multiple benchmarks through a unified training approach.",
        "zh-CN": "Toto 2.0通过统一训练方法在多个基准上实现最优预测性能",
        "ja": "時系列ファウンダショナルモデルはパラメータサイズに応じて予測性能がスケーラブルで、Toto 2.0は統一的なトレーニングアプローチにより複数のベンチマークで最適な結果を達成する。",
        "ko": "시간 시리즈 기반 모델은 파라미터 크기에 따라 예측 성능이 확장되며, Toto 2.0은 통합 학습 접근법으로 최상의 결과를 달성한다.",
        "es": "Los modelos fundamentales de series temporales demuestran un rendimiento predecible a gran escala, con Toto 2.0 alcanzando resultados de vanguardia en múltiples benchmarks mediante un enfoque de entrenamiento unificado.",
        "de": "Zeitreihen-Grundmodell zeigt skalierbare Vorhersageleistung, Toto 2.0 erreicht Spitzenresultate durch einheitliches Training."
      }
    },
    {
      "arxivId": "2605.17757",
      "title": "OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization",
      "summary": "OSCAR is an ultra-low-bit KV cache quantization method that aligns quantization with attention-aware covariance structures, achieving high accuracy and efficiency for long-context LLM serving.",
      "authors": [
        "Zhongzhu Zhou",
        "Donglin Zhuang",
        "Jisen Li",
        "Ziyan Chen",
        "Shuaiwen Leon Song",
        "Ben Athiwaratkun"
      ],
      "organization": {
        "_id": "632b803bb2dd35f135623cc2",
        "name": "togethercomputer",
        "fullname": "Together",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/67be52be391885e452f4f1be/XiCaXuCIs-N_03X_nzopf.jpeg"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 59,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.17757.png",
      "arxivUrl": "https://arxiv.org/abs/2605.17757",
      "pdfUrl": "https://arxiv.org/pdf/2605.17757.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.17757",
      "githubRepo": "https://github.com/FutureMLS-Lab/OSCAR",
      "githubStars": 48,
      "keywords": [
        "KV-cache quantization",
        "attention-aware covariance structures",
        "fixed rotations",
        "clipping thresholds",
        "INT2",
        "attention kernel"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "OSCAR：基于谱协方差感知的2比特KV缓存量化方法",
      "summary_zh": "OSCAR通过关注注意力协方差结构实现2比特KV缓存量化，提升长上下文大模型服务的精度和效率。",
      "title_i18n": {
        "en": "OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization",
        "zh-CN": "OSCAR：基于谱协方差感知的2比特KV缓存量化方法",
        "ja": "OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization",
        "ko": "OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization",
        "es": "OSCAR: Rotación consciente de covarianza espectral para cuantificación de caché KV de 2 bits",
        "de": "OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization"
      },
      "summary_i18n": {
        "en": "OSCAR is an ultra-low-bit KV cache quantization method that aligns quantization with attention-aware covariance structures, achieving high accuracy and efficiency for long-context LLM serving.",
        "zh-CN": "OSCAR通过关注注意力协方差结构实现2比特KV缓存量化，提升长上下文大模型服务的精度和效率。",
        "ja": "OSCARは、注目-awareな共分散構造に合わせた超低ビットKVキャッシュ量子化法であり、長文LLMのサービスにおいて高い精度と効率を実現する。",
        "ko": "OSCAR은 주의 중심 공분산 구조에 맞춰 양자화를 조정하는 초저비트 KV 캐시 양자화 방법으로 높은 정확도와 효율성을 제공한다.",
        "es": "OSCAR es un método de cuantificación de caché KV de ultra-bajo bit que alinea la cuantificación con estructuras de covarianza conscientes de atención, logrando alta precisión y eficiencia para servir modelos de lenguaje de gran tamaño.",
        "de": "OSCAR ist eine ultra-niedrige Bit-KV-Cache-Quantisierungsmethode, die Quantisierung mit Aufmerksamkeitsstruktur verbindet, um Genauigkeit und Effizienz zu erhöhen."
      }
    },
    {
      "arxivId": "2605.14236",
      "title": "Active Learners as Efficient PRP Rerankers",
      "summary": "Pairwise ranking prompting is reformulated as active learning from noisy comparisons, with improved rankers that enhance ranking quality under call constraints and address position bias through a randomized oracle.",
      "authors": [
        "Jeremías Figueiredo Paschmann",
        "Juan Kaplan",
        "Francisco Nattero",
        "Santiago Barron",
        "Juan Wisznia",
        "Luciano del Corro"
      ],
      "organization": {
        "_id": "66d5cb037300d333daebedd9",
        "name": "UdeSA",
        "fullname": "Universidad de San Andrés",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6594888e754092f6b1443bbd/C-ZrIbZoTgAj9p-u0v5jZ.png"
      },
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 93,
      "comments": 4,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14236.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14236",
      "pdfUrl": "https://arxiv.org/pdf/2605.14236.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14236",
      "githubRepo": "https://github.com/jerecoder/IReranker",
      "githubStars": 6,
      "keywords": [
        "pairwise ranking prompting",
        "active learning",
        "noisy pairwise comparisons",
        "ranking algorithms",
        "NDCG@10",
        "call budget"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "主动学习作为高效PRP重排序器",
      "summary_zh": "通过噪声比较将成对排名提示转化为主动学习，提升受限调用下的排序质量并随机化Oracle解决位置偏差",
      "title_i18n": {
        "en": "Active Learners as Efficient PRP Rerankers",
        "zh-CN": "主动学习作为高效PRP重排序器",
        "ja": "Active Learners as Efficient PRP Rerankers",
        "ko": "Active Learners as Efficient PRP Rerankers",
        "es": "Aprendices Activos como Rerankers Eficientes para PRP",
        "de": "Active Learners as Efficient PRP Rerankers"
      },
      "summary_i18n": {
        "en": "Pairwise ranking prompting is reformulated as active learning from noisy comparisons, with improved rankers that enhance ranking quality under call constraints and address position bias through a randomized oracle.",
        "zh-CN": "通过噪声比较将成对排名提示转化为主动学习，提升受限调用下的排序质量并随机化Oracle解决位置偏差",
        "ja": "ペアワイズランキングプロンプトはノイジーな比較からアクティブ学習として再定義され、ランク品質を向上させ、位置バイアスに対処するランダムオラクルを用いる。",
        "ko": "쌍별 순위 지침은 노이즈 있는 비교로부터 적극적인 학습으로 재구성되며, 순위 품질을 개선하고 위치 편향을 해결한다.",
        "es": "El emparejamiento de clasificación se reformula como aprendizaje activo a partir de comparaciones ruidosas, con rankers mejorados que mejoran la calidad de clasificación bajo restricciones de llamada y abordan el sesgo de posición mediante un oráculo aleatorio.",
        "de": "Paarweises Ranking wird als aktives Lernen aus verrauschten Vergleichen neu definiert, mit besseren Rankern unter Call-Beschränkungen."
      }
    },
    {
      "arxivId": "2605.16257",
      "title": "DexJoCo: A Benchmark and Toolkit for Task-Oriented Dexterous Manipulation on MuJoCo",
      "summary": "DexJoCo presents a benchmark and toolkit for dexterous manipulation with 11 functional tasks evaluating tool-use, bimanual coordination, and long-horizon execution, along with a low-cost data collection system and comprehensive model evaluation.",
      "authors": [
        "Hanwen Wang",
        "Weizhi Zhao",
        "Xiangyu Wang",
        "Siyuan Huang",
        "He Lin",
        "Boyuan Zheng"
      ],
      "organization": null,
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 49,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.16257.png",
      "arxivUrl": "https://arxiv.org/abs/2605.16257",
      "pdfUrl": "https://arxiv.org/pdf/2605.16257.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.16257",
      "githubRepo": "https://github.com/brave-eai/dexjoco",
      "githubStars": 66,
      "keywords": [
        "dexterous manipulation",
        "benchmark",
        "toolkit",
        "functionally grounded tasks",
        "tool-use",
        "bimanual coordination"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "DexJoCo：MuJoCo上的任务导向灵巧操作基准与工具包",
      "summary_zh": "构建DexJoCo基准与工具包，包含11个功能任务评估工具使用、双臂协调和长时序执行。",
      "title_i18n": {
        "en": "DexJoCo: A Benchmark and Toolkit for Task-Oriented Dexterous Manipulation on MuJoCo",
        "zh-CN": "DexJoCo：MuJoCo上的任务导向灵巧操作基准与工具包",
        "ja": "DexJoCo: A Benchmark and Toolkit for Task-Oriented Dexterous Manipulation on MuJoCo",
        "ko": "DexJoCo: A Benchmark and Toolkit for Task-Oriented Dexterous Manipulation on MuJoCo",
        "es": "DexJoCo: Un benchmark y herramienta para manipulación diestra orientada a tareas en MuJoCo",
        "de": "DexJoCo: A Benchmark and Toolkit for Task-Oriented Dexterous Manipulation on MuJoCo"
      },
      "summary_i18n": {
        "en": "DexJoCo presents a benchmark and toolkit for dexterous manipulation with 11 functional tasks evaluating tool-use, bimanual coordination, and long-horizon execution, along with a low-cost data collection system and comprehensive model evaluation.",
        "zh-CN": "构建DexJoCo基准与工具包，包含11个功能任务评估工具使用、双臂协调和长时序执行。",
        "ja": "DexJoCoは、11の機能的なタスクを評価し、ツール使用、両手協調、長期実行を含む、操作性のベンチマーケとツールキットを提示する。",
        "ko": "DexJoCo는 도구 사용, 이완 조절, 장기 실행을 평가하는 11개의 기능적 작업을 포함한 벤치마크와 툴킷을 제공한다.",
        "es": "DexJoCo presenta un benchmark y herramienta para manipulación diestra con 11 tareas funcionales que evalúan el uso de herramientas, coordinación bimanual y ejecución a largo plazo, junto con un sistema de recolección de datos de bajo costo y evaluación completa de modelos.",
        "de": "DexJoCo präsentiert Benchmark und Werkzeugkasten für geschickte Manipulation mit 11 Funktionstasks und kostengünstiger Datenbeschaffung."
      }
    },
    {
      "arxivId": "2605.14271",
      "title": "Auditing Agent Harness Safety",
      "summary": "LLM agents executing within execution harnesses can produce correct outputs while violating safety constraints during execution, necessitating trajectory-level auditing to ensure proper resource access and information flow across multi-agent systems.",
      "authors": [
        "Chengzhi Liu",
        "Yichen Guo",
        "Yepeng Liu",
        "Yuzhe Yang",
        "Qianqi Yan",
        "Xuandong Zhao"
      ],
      "organization": {
        "_id": "65861edfe3f7a2dcf04230f8",
        "name": "ucsbnlp",
        "fullname": "UC Santa Barbara NLP Group",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6002c1db698168af3bb9f4a5/WQYUIGXIycUiVr_J5X2n0.jpeg"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 54,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14271.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14271",
      "pdfUrl": "https://arxiv.org/pdf/2605.14271.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14271",
      "githubRepo": "https://github.com/eric-ai-lab/HarnessAudit",
      "githubStars": 37,
      "keywords": [
        "execution harnesses",
        "tool dispatching",
        "resource allocation",
        "multi-agent systems",
        "safety benchmarks",
        "trajectory auditing"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "审计代理执行安全",
      "summary_zh": "通过轨迹级审计确保多代理系统中资源访问与信息流的安全性",
      "title_i18n": {
        "en": "Auditing Agent Harness Safety",
        "zh-CN": "审计代理执行安全",
        "ja": "Auditing Agent Harness Safety",
        "ko": "Auditing Agent Harness Safety",
        "es": "Auditing Agent Harness Safety",
        "de": "Auditing Agent Harness Safety"
      },
      "summary_i18n": {
        "en": "LLM agents executing within execution harnesses can produce correct outputs while violating safety constraints during execution, necessitating trajectory-level auditing to ensure proper resource access and information flow across multi-agent systems.",
        "zh-CN": "通过轨迹级审计确保多代理系统中资源访问与信息流的安全性",
        "ja": "実行ハarness内で動作するLLMエージェントは、実行中に安全制約を違反しながら正しい出力を生成する可能性があり、マルチエージェントシステム全体のリソースアクセスと情報フローを確保するためにトラジェクトリーレベルの監査が必要である。",
        "ko": "LLM 에이전트는 실행 허니문에서 올바른 출력을 생성하지만, 실행 중 안전 제약을 위반할 수 있어, 다중 에이전트 시스템에서 자원 접근 및 정보 흐름을 보장하기 위해 궤적 수준의 감사를 필요로 한다.",
        "es": "Los agentes LLM pueden producir salidas correctas mientras violan restricciones de seguridad, requiriendo auditoría a nivel de trayectoria para garantizar acceso a recursos y flujo de información.",
        "de": "LLM-Agenten, die innerhalb von Ausführungs-Harnesses laufen, können korrekte Ergebnisse liefern, während sie während der Ausführung Sicherheitsbeschränkungen verletzen, was eine Trajektorien-Ebene-Überprüfung erfordert, um eine ordnungsgemäße Ressourcenzugriff und Informationsfluss in Multi-Agenten-Systemen sicherzustellen."
      }
    },
    {
      "arxivId": "2605.19577",
      "title": "GoLongRL: Capability-Oriented Long Context Reinforcement Learning with Multitask Alignment",
      "summary": "GoLongRL presents an open-source approach for long-context reinforcement learning with diverse reward optimization through capability-oriented data construction and TMN-Reweight methodology.",
      "authors": [
        "Minxuan Lv",
        "Tiehua Mei",
        "Tanlong Du",
        "Junmin Chen",
        "Zhenpeng Su",
        "Ziyang Chen"
      ],
      "organization": null,
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 55,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.19577.png",
      "arxivUrl": "https://arxiv.org/abs/2605.19577",
      "pdfUrl": "https://arxiv.org/pdf/2605.19577.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.19577",
      "githubRepo": "https://github.com/xiaoxuanNLP/GoLongRL",
      "githubStars": 34,
      "keywords": [
        "reinforcement learning",
        "long-context",
        "verifiable rewards",
        "post-training recipe",
        "RLVR",
        "GRPO"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "GoLongRL：基于多任务对齐的长上下文强化学习",
      "summary_zh": "提出GoLongRL方法，通过能力导向数据构建和TMN重加权实现长上下文强化学习的多样化奖励优化",
      "title_i18n": {
        "en": "GoLongRL: Capability-Oriented Long Context Reinforcement Learning with Multitask Alignment",
        "zh-CN": "GoLongRL：基于多任务对齐的长上下文强化学习",
        "ja": "GoLongRL: Capability-Oriented Long Context Reinforcement Learning with Multitask Alignment",
        "ko": "GoLongRL: Capability-Oriented Long Context Reinforcement Learning with Multitask Alignment",
        "es": "GoLongRL: Capacidad orientada al aprendizaje de refuerzo con contexto largo y alineación multitarea",
        "de": "GoLongRL: Capability-Oriented Long Context Reinforcement Learning with Multitask Alignment"
      },
      "summary_i18n": {
        "en": "GoLongRL presents an open-source approach for long-context reinforcement learning with diverse reward optimization through capability-oriented data construction and TMN-Reweight methodology.",
        "zh-CN": "提出GoLongRL方法，通过能力导向数据构建和TMN重加权实现长上下文强化学习的多样化奖励优化",
        "ja": "GoLongRLは、能力指向のデータ構築とTMN-Reweight手法を通じて、多様な報酬最適化を実現するオープンソースの長文脈強化学習アプローチを提示する。",
        "ko": "GoLongRL은 능력 중심의 데이터 구성과 TMN-Reweight 방법론을 통해 다양한 보상 최적화를 위한 오픈소스 장기 맥락 강화 학습 접근법을 제시한다.",
        "es": "GoLongRL presenta un enfoque open-source para el aprendizaje de refuerzo con contexto largo y optimización de recompensas diversas mediante construcción de datos orientados a capacidades y metodología TMN-Reweight.",
        "de": "GoLongRL präsentiert einen Open-Source-Ansatz für langfristiges Verstärkungslernen mit vielfältiger Belohnungsoptimierung durch capability-orientierte Datenkonstruktion und TMN-Reweight-Methode."
      }
    },
    {
      "arxivId": "2605.16928",
      "title": "Full Attention Strikes Back: Transferring Full Attention into Sparse within Hundred Training Steps",
      "summary": "RTPurbo leverages intrinsic sparsity in full-attention LLMs to achieve efficient long-context inference with minimal training overhead, enabling significant speedups while maintaining near-lossless accuracy.",
      "authors": [
        "Yanke Zhou",
        "Yiduo Li",
        "Hanlin Tang",
        "Maohua Li",
        "Kan Liu",
        "Lan Tao"
      ],
      "organization": {
        "_id": "6948e7d0a2a90d1cca14cbbc",
        "name": "RTP-LLM",
        "fullname": "RTP-LLM",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6426d1afbc4f1d51f5479914/lgUmPC4DXPxlhRBDnHybm.webp"
      },
      "publishedAt": "2026-05-16T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 81,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.16928.png",
      "arxivUrl": "https://arxiv.org/abs/2605.16928",
      "pdfUrl": "https://arxiv.org/pdf/2605.16928.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.16928",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "full attention",
        "long-context inference",
        "attention heads",
        "sparse attention",
        "KV cache",
        "token indexer"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "全注意力回归：在百步训练内迁移全注意力",
      "summary_zh": "通过利用全注意力LLM的内在稀疏性，实现高效长上下文推理，训练开销小且近似无损失精度",
      "title_i18n": {
        "en": "Full Attention Strikes Back: Transferring Full Attention into Sparse within Hundred Training Steps",
        "zh-CN": "全注意力回归：在百步训练内迁移全注意力",
        "ja": "Full Attention Strikes Back: Transferring Full Attention into Sparse within Hundred Training Steps",
        "ko": "Full Attention Strikes Back: Transferring Full Attention into Sparse within Hundred Training Steps",
        "es": "Full Attention Strikes Back: Transferir Full Attention en Pocas Etapas de Entrenamiento",
        "de": "Full Attention Strikes Back: Transferring Full Attention into Sparse within Hundred Training Steps"
      },
      "summary_i18n": {
        "en": "RTPurbo leverages intrinsic sparsity in full-attention LLMs to achieve efficient long-context inference with minimal training overhead, enabling significant speedups while maintaining near-lossless accuracy.",
        "zh-CN": "通过利用全注意力LLM的内在稀疏性，实现高效长上下文推理，训练开销小且近似无损失精度",
        "ja": "RTPurboは、フルアテンションLLMの内在的なスパーシティを利用して、最小限のトレーニングオーバーヘッドで効率的な長文脈推論を実現し、ほぼ損失のない精度を維持する。",
        "ko": "RTPurbo는 전체 주의 LLM의 내재적 희소성을 활용하여 짧은 학습 비용으로 효율적인 장기 맥락 추론을 달성한다.",
        "es": "RTPurbo aprovecha la esparsidad intrínseca de los LLMs con atención completa para una inferencia eficiente con poco costo de entrenamiento, logrando aceleraciones significativas con precisión casi sin pérdida.",
        "de": "RTPurbo nutzt die inhärente Spärlichkeit in voller Aufmerksamkeits-LLMs, um effiziente Langzeit-Inferenz mit minimalem Trainingsaufwand zu erreichen, wodurch sich erhebliche Geschwindigkeitsverbesserungen erzielen lassen, ohne die Genauigkeit stark zu beeinträchtigen."
      }
    },
    {
      "arxivId": "2605.18703",
      "title": "EnvFactory: Scaling Tool-Use Agents via Executable Environments Synthesis and Robust RL",
      "summary": "EnvFactory automates the creation of executable tool environments and natural multi-turn trajectories for training LLMs with agentic reinforcement learning, achieving superior performance with fewer resources.",
      "authors": [
        "Minrui Xu",
        "Zilin Wang",
        "Mengyi DENG",
        "Zhiwei Li",
        "Zhicheng Yang",
        "Xiao Zhu"
      ],
      "organization": {
        "_id": "6980a3aede8ee5f0a7de0007",
        "name": "LARK-Lab",
        "fullname": "LARK Lab@HKUST (GZ)",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/63b6af3accebeadccc868efd/H6b3XExLG87O3ZFPV7Pr5.png"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 48,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18703.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18703",
      "pdfUrl": "https://arxiv.org/pdf/2605.18703.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18703",
      "githubRepo": "https://github.com/LARK-AI-Lab/EnvFactory",
      "githubStars": 50,
      "keywords": [
        "Agentic Reinforcement Learning",
        "tool-use capabilities",
        "execution environments",
        "synthetic trajectories",
        "topology-aware sampling",
        "calibrated refinement"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "EnvFactory：通过可执行环境合成与鲁棒强化学习扩展工具使用代理",
      "summary_zh": "EnvFactory自动化生成可执行环境和多轮轨迹，提升LLMs在代理强化学习中的性能，减少资源消耗。",
      "title_i18n": {
        "en": "EnvFactory: Scaling Tool-Use Agents via Executable Environments Synthesis and Robust RL",
        "zh-CN": "EnvFactory：通过可执行环境合成与鲁棒强化学习扩展工具使用代理",
        "ja": "EnvFactory: Scaling Tool-Use Agents via Executable Environments Synthesis and Robust RL",
        "ko": "EnvFactory: Scaling Tool-Use Agents via Executable Environments Synthesis and Robust RL",
        "es": "EnvFactory: Escalando Agentes con Uso de Herramientas mediante Síntesis de Entornos Ejecutables y RL Robusto",
        "de": "EnvFactory: Scaling Tool-Use Agents via Executable Environments Synthesis and Robust RL"
      },
      "summary_i18n": {
        "en": "EnvFactory automates the creation of executable tool environments and natural multi-turn trajectories for training LLMs with agentic reinforcement learning, achieving superior performance with fewer resources.",
        "zh-CN": "EnvFactory自动化生成可执行环境和多轮轨迹，提升LLMs在代理强化学习中的性能，减少资源消耗。",
        "ja": "EnvFactoryは、LLMsをエージェント強化学習で訓練するための実行可能なツール環境と自然な複数ターントレースを自動生成し、少ないリソースで優れた性能を達成する。",
        "ko": "EnvFactory는 LLM을 위한 실행 가능한 도구 환경과 자연스러운 다단계 트레일을 자동으로 생성하여, 더 적은 자원으로 우수한 성능을 달성한다.",
        "es": "EnvFactory automatiza la creación de entornos ejecutables y trayectorias multi-turno para entrenar LLMs con aprendizaje de refuerzo agente, logrando mejor rendimiento con menos recursos.",
        "de": "EnvFactory automatisiert die Erstellung ausführbarer Werkzeug-Umgebungen und natürlicher Mehrphasen-Trajektorien zur Schulung von LLMs mit agentenbasiertem Verstärkungslernen und erzielt bessere Leistungen mit weniger Ressourcen."
      }
    },
    {
      "arxivId": "2605.11739",
      "title": "Learning to Foresee: Unveiling the Unlocking Efficiency of On-Policy Distillation",
      "summary": "On-policy distillation efficiency arises from early establishment of stable update trajectories, with findings leading to a plug-and-play acceleration method achieving 3x training speedup.",
      "authors": [
        "Yuchen Cai",
        "Ding Cao",
        "Liang Lin",
        "Chunxi Luo",
        "Xin Xu",
        "Kai Yang"
      ],
      "organization": {
        "_id": "6645f953c39288df638dbdd5",
        "name": "Tencent-Hunyuan",
        "fullname": "Tencent Hunyuan",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/62d22496c58f969c152bcefd/woKSjt2wXvBNKussyYPsa.png"
      },
      "publishedAt": "2026-05-13T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 55,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.11739.png",
      "arxivUrl": "https://arxiv.org/abs/2605.11739",
      "pdfUrl": "https://arxiv.org/pdf/2605.11739.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.11739",
      "githubRepo": "https://github.com/caiyuchen-ustc/EffOPD",
      "githubStars": 28,
      "keywords": [
        "on-policy distillation",
        "post-training paradigm",
        "parameter-level mechanisms",
        "module-allocation level",
        "update-direction level",
        "low-rank concentration"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "学习预见：揭示在策略蒸馏中的效率提升机制",
      "summary_zh": "通过揭示稳定更新轨迹的早期建立，提出一种3倍训练加速的即插即用方法",
      "title_i18n": {
        "en": "Learning to Foresee: Unveiling the Unlocking Efficiency of On-Policy Distillation",
        "zh-CN": "学习预见：揭示在策略蒸馏中的效率提升机制",
        "ja": "Learning to Foresee: Unveiling the Unlocking Efficiency of On-Policy Distillation",
        "ko": "Learning to Foresee: Unveiling the Unlocking Efficiency of On-Policy Distillation",
        "es": "Aprender a Prever: Revelando la Eficiencia de la Distilación On-Policy",
        "de": "Learning to Foresee: Unveiling the Unlocking Efficiency of On-Policy Distillation"
      },
      "summary_i18n": {
        "en": "On-policy distillation efficiency arises from early establishment of stable update trajectories, with findings leading to a plug-and-play acceleration method achieving 3x training speedup.",
        "zh-CN": "通过揭示稳定更新轨迹的早期建立，提出一种3倍训练加速的即插即用方法",
        "ja": "オンポリシーディスティルの効率は、安定した更新トレースの早期確立に起因し、3倍のトレーニング速度向上を実現する即插即用の加速法が得られる。",
        "ko": "정책 이동의 효율성은 초기 안정된 업데이트 궤적이 설정될 때 발생하며, 이를 기반으로 3배 빠른 학습 가속 방법을 제공한다.",
        "es": "La eficiencia de la distilación on-policy surge de la establecimiento temprano de trayectorias de actualización estables, llevando a un método de aceleración plug-and-play con 3x aumento de velocidad de entrenamiento.",
        "de": "Die Effizienz der on-policy-Distillation entsteht durch die frühe Stabilisierung von Update-Trajektorien, wobei die Ergebnisse zu einem plug-and-play Beschleunigungsmethode führen, die die Trainingsgeschwindigkeit um das Dreifache erhöht."
      }
    },
    {
      "arxivId": "2605.18451",
      "title": "Code-as-Room: Generating 3D Rooms from Top-Down View Images via Agentic Code Synthesis",
      "summary": "A novel MLLM-based agentic framework called Code-as-Room generates 3D indoor rooms by converting top-down images into executable Blender code through a structured execution harness with cross-stage memory to maintain context.",
      "authors": [
        "Yixuan Yang",
        "Zhen Luo",
        "Wanshui Gan",
        "Jinkun Hao",
        "Junru Lu",
        "Jinghao Yan"
      ],
      "organization": {
        "_id": "6747ee5decec679eafb90450",
        "name": "ShanghaiAiLab",
        "fullname": "shanghai ailab ",
        "avatar": "https://www.gravatar.com/avatar/6cd2acf412ad103653d9ce14a1aacc19?d=retro&size=100"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 40,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18451.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18451",
      "pdfUrl": "https://arxiv.org/pdf/2605.18451.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18451",
      "githubRepo": "https://github.com/YxuanAr/Code-as-Room",
      "githubStars": 82,
      "keywords": [
        "MLLM-based agentic framework",
        "structured execution harness",
        "Blender codes",
        "cross-stage memory",
        "3D room synthesis",
        "top-down views"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Code-as-Room：通过代理代码生成从俯视图生成3D房间",
      "summary_zh": "提出Code-as-Room框架，通过结构化执行引擎将俯视图转为可执行Blender代码生成3D室内场景。",
      "title_i18n": {
        "en": "Code-as-Room: Generating 3D Rooms from Top-Down View Images via Agentic Code Synthesis",
        "zh-CN": "Code-as-Room：通过代理代码生成从俯视图生成3D房间",
        "ja": "Code-as-Room: Generating 3D Rooms from Top-Down View Images via Agentic Code Synthesis",
        "ko": "Code-as-Room: Generating 3D Rooms from Top-Down View Images via Agentic Code Synthesis",
        "es": "Code-as-Room: Generando Habitaciones 3D desde Imágenes de Vista Superior mediante Síntesis de Código Agente",
        "de": "Code-as-Room: Generating 3D Rooms from Top-Down View Images via Agentic Code Synthesis"
      },
      "summary_i18n": {
        "en": "A novel MLLM-based agentic framework called Code-as-Room generates 3D indoor rooms by converting top-down images into executable Blender code through a structured execution harness with cross-stage memory to maintain context.",
        "zh-CN": "提出Code-as-Room框架，通过结构化执行引擎将俯视图转为可执行Blender代码生成3D室内场景。",
        "ja": "Code-as-Roomという新しいMLLMベースのエージェンティックフレームワークは、トップダウン画像を実行可能なBlenderコードに変換して3D室内空間を生成する。",
        "ko": "Code-as-Room이라는 새로운 MLLM 기반 프레임워크는 구조화된 실행 허니문을 통해 상단 보기 이미지를 실행 가능한 Blender 코드로 변환하여 3D 실내 공간을 생성한다.",
        "es": "Un marco agente basado en MLLM llamado Code-as-Room genera habitaciones 3D interiores convirtiendo imágenes de vista superior en código ejecutable de Blender mediante un harness estructurado con memoria cruzada.",
        "de": "Ein neues MLLM-basiertes agenetisches Framework namens Code-as-Room generiert 3D-Innenräume, indem Top-Down-Bilder in ausführbaren Blender-Code umgewandelt werden, unterstützt durch einen strukturierten Ausführungs-Harness mit Kreuz-Stufe-Memory, um den Kontext zu erhalten."
      }
    },
    {
      "arxivId": "2605.22817v1",
      "title": "Vector Policy Optimization: Training for Diversity Improves Test-Time Search",
      "summary": "Language models must now generalize out of the box to novel environments and work inside inference-scaling search procedures, such as AlphaEvolve, that select rollouts with a variety of task-specific reward functions. Unfortunately, the standard paradigm of LLM post-training optimizes a pre-specified scalar reward, often leading current LLMs to produce low-entropy response distributions and thus to struggle at displaying the diversity that inference-time search will require. We propose Vector Policy Optimization (VPO), an RL algorithm that explicitly trains policies to anticipate diverse downstream reward functions and to produce diverse solutions. VPO exploits that rewards are often vector-valued in practice, like per-test-case correctness in code generation or, say, multiple different user personas or reward models. VPO is essentially a drop-in replacement for the GRPO advantage estimator, but it trains the LLM to output a set of solutions where individual solutions specialize to different trade-offs in the vector reward space. Across four tasks, VPO matches or beats the strongest scalar RL baselines on test-time search (e.g. pass@k and best@k), with the gap widening as the search budget grows. For evolutionary search, VPO models unlock problems that GRPO models cannot solve at all. As test-time search becomes more standardized, optimizing for diversity may need to become the default post-training objective.",
      "authors": [
        "Ryan Bahlous-Boldi",
        "Isha Puri",
        "Idan Shenfeld",
        "Akarsh Kumar",
        "Mehul Damani",
        "Sebastian Risi"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:59:26Z",
      "submittedAt": "2026-05-21T17:59:26Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22817v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22817v1",
      "hfUrl": "https://arxiv.org/abs/2605.22817v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG",
        "cs.AI",
        "cs.CL",
        "cs.NE"
      ],
      "source": "arXiv cs.CL",
      "sourceType": "arxiv",
      "title_zh": "向量策略优化：训练多样性提升测试时搜索",
      "summary_zh": "提出VPO算法，通过向量奖励训练模型生成多样化解，提升测试时搜索性能，优于传统标量RL方法。",
      "title_i18n": {
        "en": "Vector Policy Optimization: Training for Diversity Improves Test-Time Search",
        "zh-CN": "向量策略优化：训练多样性提升测试时搜索",
        "ja": "Vector Policy Optimization: Training for Diversity Improves Test-Time Search",
        "ko": "Vector Policy Optimization: Training for Diversity Improves Test-Time Search",
        "es": "Vector Policy Optimization: Entrenamiento para Diversidad Mejora la Búsqueda en Tiempo de Prueba",
        "de": "Vector Policy Optimization: Training for Diversity Improves Test-Time Search"
      },
      "summary_i18n": {
        "en": "Language models must now generalize out of the box to novel environments and work inside inference-scaling search procedures, such as AlphaEvolve, that select rollouts with a variety of task-specific reward functions. Unfortunately, the standard paradigm of LLM post-training optimizes a pre-specified scalar reward, often leading current LLMs to produce low-entropy response distributions and thus to struggle at displaying the diversity that inference-time search will require. We propose Vector Policy Optimization (VPO), an RL algorithm that explicitly trains policies to anticipate diverse downstream reward functions and to produce diverse solutions. VPO exploits that rewards are often vector-valued in practice, like per-test-case correctness in code generation or, say, multiple different user personas or reward models. VPO is essentially a drop-in replacement for the GRPO advantage estimator, but it trains the LLM to output a set of solutions where individual solutions specialize to different trade-offs in the vector reward space. Across four tasks, VPO matches or beats the strongest scalar RL baselines on test-time search (e.g. pass@k and best@k), with the gap widening as the search budget grows. For evolutionary search, VPO models unlock problems that GRPO models cannot solve at all. As test-time search becomes more standardized, optimizing for diversity may need to become the default post-training objective.",
        "zh-CN": "提出VPO算法，通过向量奖励训练模型生成多样化解，提升测试时搜索性能，优于传统标量RL方法。",
        "ja": "言語モデルは、AlphaEvolveのような検索手順で多様なタスク固有報酬関数を持つロールアウトを選択する必要がある。VPOは、多様な後続報酬関数を予測し、多様な解決策を生成する方針を明示的にトレーニングする。",
        "ko": "언어 모델은 새로운 환경에 즉시 일반화해야 하며, AlphaEvolve와 같은 검색 절차에서 다양성을 요구한다. 현재 LLM은 낮은 엔트로피 응답 분포를 생성해 어려움을 겪는다. VPO는 다양성을 훈련하는 강화 학습 알고리즘이다.",
        "es": "Los modelos de lenguaje deben generalizar fuera de la caja para entornos nuevos y trabajar dentro de procedimientos de búsqueda escalados por inferencia, como AlphaEvolve.",
        "de": "Sprachmodelle müssen nun außerhalb des Kastens generalisieren, um neue Umgebungen zu bewältigen und in Inferenz-Skalierungs-Suchverfahren wie AlphaEvolve zu funktionieren, die Rollouts mit verschiedenen Aufgaben-spezifischen Belohnungsfunktionen wählen. Standardmäßig optimiert das Post-Training einer LLM eine vordefinierte Skalarbelohnung, was oft zu niedrig-Entropie Antwortverteilungen führt und somit Schwierigkeiten bereitet, die Vielfalt zu zeigen, die die Suche auf Testzeit erfordert. Wir schlagen Vector Policy Optimization (VPO) vor, einen RL-Algorithmus, der Policies explizit trainiert, um verschiedene nachfolgende Belohnungsfunktionen zu antizipieren und diverse Lösungen zu produzieren. VPO nutzt aus, dass Belohnungen in der Praxis oft vektorwertig sind, wie z.B. pro-Testfall-Korrektheit bei Codegenerierung oder verschiedene Benutzerpersönlichkeiten oder Belohnungsmodelle. VPO ist im Grunde ein Drop-in-Ersatz für den GRPO Advantage-Estimator, aber es trainiert das LLM, um eine Reihe von Lösungen zu liefern, bei denen einzelne Lösungen sich auf unterschiedliche Handlungsspielräume im Vektor-Belohnungsraum spezialisieren. In vier Aufgaben erreicht VPO oder übertrifft die stärksten skalaren RL-Baselines bei Testzeit-Suche (z.B. pass@k und best@k), wobei der Abstand mit wachsendem Suchbudget zunimmt. Für evolutionäre Suche öffnet VPO Probleme, die GRPO-Modelle nicht lösen können. Da die Testzeit-Suche standardisiert wird, könnte die Optimierung für Vielfalt zum Standard-Post-Training-Ziel werden."
      }
    },
    {
      "arxivId": "2605.22785v1",
      "title": "Evaluating Commercial AI Chatbots as News Intermediaries",
      "summary": "AI chatbots are rapidly shaping how people encounter the news, yet no prior study has systematically measured how accurately these systems, with their proprietary search integrations and retrieval-synthesis pipelines, handle emerging facts across languages and regions. We present a 14-day (February 9-22, 2026) evaluation of six AI chatbots (Gemini 3 Flash and Pro, Grok 4, Claude 4.5 Sonnet, GPT-5 and GPT-4o mini) on 2,100 factual questions derived from same-day BBC News reporting across six regional services (US & Canada, Arabic, Afrique, Hindi, Russian, Turkish). The best systems achieve over 90% multiple-choice accuracy on questions about events reported hours earlier. The same systems, however, lose 11-13% under free-response evaluation, and 16-17% across the cohort. We further characterize three failure patterns. First, every model achieves its lowest accuracy on Hindi (79% vs. 89-91% elsewhere) and citations indicate an Anglophone retrieval bias (e.g., models answering Hindi queries cite English Wikipedia more than any Hindi outlet). Second, retrieval, not reasoning, failures drive over 70% of all errors. When models retrieve a correct source, they often extract the correct answer; the problem is to land on the right source in the first place. Third, models achieving 88-96% accuracy on well-formed questions drop to 19-70% when questions contain subtle false premises, with the most vulnerable model accepting fabricated facts 64% of the time. We also identify a detection-accuracy paradox: the best false-premise detector ranks second in adversarial accuracy (abstention rate), while a weaker detector ranks first, showing that premise detection and answer recovery are partially independent capabilities. Overall, these suggest that high accuracy can mask systematic regional inequity, near-total dependence on retrieval infrastructure, and vulnerability to imperfect queries real users pose.",
      "authors": [
        "Mirac Suzgun",
        "Emily Shen",
        "Federico Bianchi",
        "Alexander Spangher",
        "Thomas Icard",
        "Daniel E. Ho"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:42:07Z",
      "submittedAt": "2026-05-21T17:42:07Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22785v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22785v1",
      "hfUrl": "https://arxiv.org/abs/2605.22785v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.CL"
      ],
      "source": "arXiv cs.CL",
      "sourceType": "arxiv",
      "title_zh": "评估商业AI聊天机器人作为新闻中介的表现",
      "summary_zh": "测试六款AI聊天机器人在2100个事实性问题上的表现，发现其准确率在90%以上，但自由回答时下降11-17%，存在区域偏见和检索依赖问题。",
      "title_i18n": {
        "en": "Evaluating Commercial AI Chatbots as News Intermediaries",
        "zh-CN": "评估商业AI聊天机器人作为新闻中介的表现",
        "ja": "Evaluating Commercial AI Chatbots as News Intermediaries",
        "ko": "Evaluating Commercial AI Chatbots as News Intermediaries",
        "es": "Evaluando Chatbots de IA Comerciales como Intermediarios de Noticias",
        "de": "Evaluating Commercial AI Chatbots as News Intermediaries"
      },
      "summary_i18n": {
        "en": "AI chatbots are rapidly shaping how people encounter the news, yet no prior study has systematically measured how accurately these systems, with their proprietary search integrations and retrieval-synthesis pipelines, handle emerging facts across languages and regions. We present a 14-day (February 9-22, 2026) evaluation of six AI chatbots (Gemini 3 Flash and Pro, Grok 4, Claude 4.5 Sonnet, GPT-5 and GPT-4o mini) on 2,100 factual questions derived from same-day BBC News reporting across six regional services (US & Canada, Arabic, Afrique, Hindi, Russian, Turkish). The best systems achieve over 90% multiple-choice accuracy on questions about events reported hours earlier. The same systems, however, lose 11-13% under free-response evaluation, and 16-17% across the cohort. We further characterize three failure patterns. First, every model achieves its lowest accuracy on Hindi (79% vs. 89-91% elsewhere) and citations indicate an Anglophone retrieval bias (e.g., models answering Hindi queries cite English Wikipedia more than any Hindi outlet). Second, retrieval, not reasoning, failures drive over 70% of all errors. When models retrieve a correct source, they often extract the correct answer; the problem is to land on the right source in the first place. Third, models achieving 88-96% accuracy on well-formed questions drop to 19-70% when questions contain subtle false premises, with the most vulnerable model accepting fabricated facts 64% of the time. We also identify a detection-accuracy paradox: the best false-premise detector ranks second in adversarial accuracy (abstention rate), while a weaker detector ranks first, showing that premise detection and answer recovery are partially independent capabilities. Overall, these suggest that high accuracy can mask systematic regional inequity, near-total dependence on retrieval infrastructure, and vulnerability to imperfect queries real users pose.",
        "zh-CN": "测试六款AI聊天机器人在2100个事实性问题上的表现，发现其准确率在90%以上，但自由回答时下降11-17%，存在区域偏见和检索依赖问题。",
        "ja": "AIチャットボットはニュースの接触方法を急速に形作っているが、その正確性はまだ系統的に測定されていない。評価では、最高のシステムでも質問に対する正答率が低下している。",
        "ko": "AI 챗봇은 뉴스 접촉 방식을 빠르게 변화시키고 있으나, 정확도 측정은 미비하다. 14일간의 평가에서 일부 시스템은 90% 이상의 정확도를 보였으나, 자유 응답에서는 11-13% 감소했다.",
        "es": "Los chatbots de IA están moldeando cómo las personas encuentran noticias, pero ningún estudio previo ha medido sistemáticamente su precisión en hechos emergentes.",
        "de": "AI-Chatbots formen rasch, wie Menschen Nachrichten begegnen, doch keine vorherige Studie hat systematisch gemessen, wie genau diese Systeme, mit ihren proprietären Suchintegrationen und Retrieval-Synthese-Pipelines, neu auftauchende Fakten über Sprachen und Regionen behandeln. Wir präsentieren eine 14-tägige Bewertung (9. bis 22. Februar 2026) sechs AI-Chatbots (Gemini 3 Flash und Pro, Grok 4, Claude 4.5 Sonnet, GPT-5 und GPT-4o mini) anhand von 2.100 sachlichen Fragen, die aus derselben Tages-BBC-Nachrichtenberichten aus sechs regionalen Diensten (US & Kanada, Arabisch, Afrika, Hindi, Russisch, Türkisch) abgeleitet wurden. Die besten Systeme erreichen über 90 % Multiple-Choice-Genauigkeit bei Fragen zu Ereignissen, die Stunden zuvor berichtet wurden. Die gleichen Systeme verlieren jedoch 11–13 % bei freier Antwortbewertung und 16–17 % im Durchschnitt. Wir charakterisieren drei Fehlermuster. Erstens erreicht jedes Modell seine niedrigste Genauigkeit bei Hindi (79 % gegenüber 89–91 % sonst). Zitierungen deuten auf einen Anglizismus-Abhängigkeitsbias hin (z.B. Modelle, die Hindi-Fragen beantworten, zitieren häufiger englische Wikipedia als jede hindi-Ausgabe). Zweitens treiben Retrieval-Fehler, nicht Reasoning-Fehler, über 70 % aller Fehler. Wenn Modelle eine korrekte Quelle abrufen, extrahieren sie oft die korrekte Antwort; das Problem besteht darin, zunächst die richtige Quelle zu finden. Drittens sinken Modelle mit 88–96 % Genauigkeit bei gut formulierten Fragen auf 19–70 %, wenn Fragen subtile falsche Prämissen enthalten, wobei das empfindlichste Modell gefälschte Fakten 64 % der Zeit akzeptiert. Wir identifizieren auch ein Detektions-Genauigkeitsparadoxon: Der beste Falschprämisse-Detektor rangiert zweiten in adversarialer Genauigkeit (Verzichtquote), während ein schwächerer Detektor ersten rangiert, was zeigt, dass Prämissenerkennung und Antwortwiederherstellung teilweise unabhängige Fähigkeiten sind. Im Allgemeinen deuten diese darauf hin, dass hohe Genauigkeit systematische regionale Ungleichheit, nahezu vollständige Abhängigkeit von Retrieval-Infrastruktur und Empfindlichkeit gegenüber unvollkommenen Anfragen realer Nutzer verdecken kann."
      }
    },
    {
      "arxivId": "2605.22769v1",
      "title": "Understanding Data Temporality Impact on Large Language Models Pre-training",
      "summary": "Large language models (LLMs) are typically trained on shuffled corpora, yielding models whose knowledge is frozen at train time and whose temporal grounding remains poorly understood. In this work, we study the impact of pre-training dynamics on the acquisition of time-sensitive factual knowledge, focusing specifically on data ordering. Our main contributions are twofold. First, we introduce a comprehensive benchmark of over 7,000 temporally grounded questions and an evaluation protocol that enables analysis of whether models correctly associate facts with their corresponding time periods. Second, we pretrain 6B-parameter models on temporally ordered Common Crawl snapshots and compare them against standard shuffled pre-training. Our results show that sequentially trained models match shuffled baselines on general language understanding and common knowledge while consistently exhibiting more up-to-date and temporally precise knowledge. Temporally ordered pre-training yields improved factual freshness, while shuffled pre-training peaks on older data, possibly due to increased factual repetition. These findings, along with the release of our code at https://github.com/kyutai-labs/kairos , checkpoints, and datasets at https://huggingface.co/collections/kyutai/kairos provide a foundation for future research on continual learning for LLMs.",
      "authors": [
        "Pilchen Hippolyte",
        "Fabre Romain",
        "Signe Talla Franck",
        "Perez Patrick",
        "Grave Edouard"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:31:17Z",
      "submittedAt": "2026-05-21T17:31:17Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22769v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22769v1",
      "hfUrl": "https://arxiv.org/abs/2605.22769v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.CL",
        "cs.AI"
      ],
      "source": "arXiv cs.CL",
      "sourceType": "arxiv",
      "title_zh": "理解数据时间性对大语言模型预训练的影响",
      "summary_zh": "研究数据顺序对大语言模型时间敏感事实知识获取的影响，通过时间有序预训练提升事实新鲜度和时间精度。",
      "title_i18n": {
        "en": "Understanding Data Temporality Impact on Large Language Models Pre-training",
        "zh-CN": "理解数据时间性对大语言模型预训练的影响",
        "ja": "Large Language Models Pre-trainingにおけるデータ時系列性の影響の理解",
        "ko": "Understanding Data Temporality Impact on Large Language Models Pre-training"
      },
      "summary_i18n": {
        "en": "Large language models (LLMs) are typically trained on shuffled corpora, yielding models whose knowledge is frozen at train time and whose temporal grounding remains poorly understood. In this work, we study the impact of pre-training dynamics on the acquisition of time-sensitive factual knowledge, focusing specifically on data ordering. Our main contributions are twofold. First, we introduce a comprehensive benchmark of over 7,000 temporally grounded questions and an evaluation protocol that enables analysis of whether models correctly associate facts with their corresponding time periods. Second, we pretrain 6B-parameter models on temporally ordered Common Crawl snapshots and compare them against standard shuffled pre-training. Our results show that sequentially trained models match shuffled baselines on general language understanding and common knowledge while consistently exhibiting more up-to-date and temporally precise knowledge. Temporally ordered pre-training yields improved factual freshness, while shuffled pre-training peaks on older data, possibly due to increased factual repetition. These findings, along with the release of our code at https://github.com/kyutai-labs/kairos , checkpoints, and datasets at https://huggingface.co/collections/kyutai/kairos provide a foundation for future research on continual learning for LLMs.",
        "zh-CN": "研究数据顺序对大语言模型时间敏感事实知识获取的影响，通过时间有序预训练提升事实新鲜度和时间精度。",
        "ja": "LLMはシャッフルされたコーパスで訓練され、知識がトレーニング時のものに固定される。本研究では、データ順序が時間的知識取得に与える影響を調査し、7,000以上の時系列質問ベンチマークを導入する。結果から、時系列順序でのトレーニングにより最新かつ正確な知識が得られることを示す。",
        "ko": "This study explores how pre-training data ordering affects LLMs' temporal knowledge. It introduces a benchmark for evaluating time-sensitive facts and shows that temporally ordered training improves factual freshness."
      }
    },
    {
      "arxivId": "2605.22734v1",
      "title": "ChronoMedKG: A Temporally-Grounded Biomedical Knowledge Graph and Benchmark for Clinical Reasoning",
      "summary": "Biomedical knowledge graphs (KGs) treat disease associations as static facts, but temporal information is crucial for clinical reasoning, e.g., a symptom diagnostic of one disease at age 3 may imply a different disease at age 13. Existing KGs such as PrimeKG, Hetionet, and iKraph do not encode when a finding becomes clinically relevant over the course of a disease. This limits their usefulness for longitudinal clinical reasoning and retrieval augmentation. We introduce ChronoMedKG, a temporal biomedical knowledge graph that contains 460,497 evidence-linked triples (filtered from 13M raw extractions) covering 13,431 diseases. Each association is tied to temporal components like onset window or progression stage, which are backed by PMID-traceable evidence and a multi-signal credibility score. The graph is constructed through a disease-autonomous multi-agent pipeline in which multiple frontier LLMs independently extract knowledge from PubMed and PMC literature. Only those relations are kept that are supported by multi-model consensus, survive credibility filtering, as well as ontology alignment. ChronoMedKG scored 92.7% agreement against Orphadata and adds temporal grounding for 6,250 diseases absent from HPOA, Orphadata, and Phenopackets, including 1,657 Orphanet-coded rare diseases. We further introduce ChronoTQA, a benchmark of 3,341 questions across eight task types (six temporal plus two static controls), with a 12-question supplementary probe. Frontier LLMs lose roughly 30 points moving from static to temporal questions; ChronoMedKG retrieval rescues 47-65% of their long-tail failures, against 17-29% for HPOA-RAG. As such, ChronoMedKG provides a crucial temporal axis for retrieval-augmented clinical systems that was previously absent.",
      "authors": [
        "Md Shamim Ahmed",
        "Farzaneh Firoozbakht",
        "Lukas Galke Poech",
        "Jan Baumbach",
        "Richard Röttger"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:04:28Z",
      "submittedAt": "2026-05-21T17:04:28Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22734v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22734v1",
      "hfUrl": "https://arxiv.org/abs/2605.22734v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.CL"
      ],
      "source": "arXiv cs.CL",
      "sourceType": "arxiv",
      "title_zh": "ChronoMedKG：一个时间锚定的生物医学知识图谱与临床推理基准",
      "summary_zh": "构建了包含46万条证据关联三元组的时间生物医学知识图谱ChronoMedKG，提升临床推理的时序准确性。",
      "title_i18n": {
        "en": "ChronoMedKG: A Temporally-Grounded Biomedical Knowledge Graph and Benchmark for Clinical Reasoning",
        "zh-CN": "ChronoMedKG：一个时间锚定的生物医学知识图谱与临床推理基准",
        "ja": "ChronoMedKG: 臨床推論用の時系列に基づく生物医学知識グラフとベンチマーク",
        "ko": "ChronoMedKG: A Temporally-Grounded Biomedical Knowledge Graph and Benchmark for Clinical Reasoning"
      },
      "summary_i18n": {
        "en": "Biomedical knowledge graphs (KGs) treat disease associations as static facts, but temporal information is crucial for clinical reasoning, e.g., a symptom diagnostic of one disease at age 3 may imply a different disease at age 13. Existing KGs such as PrimeKG, Hetionet, and iKraph do not encode when a finding becomes clinically relevant over the course of a disease. This limits their usefulness for longitudinal clinical reasoning and retrieval augmentation. We introduce ChronoMedKG, a temporal biomedical knowledge graph that contains 460,497 evidence-linked triples (filtered from 13M raw extractions) covering 13,431 diseases. Each association is tied to temporal components like onset window or progression stage, which are backed by PMID-traceable evidence and a multi-signal credibility score. The graph is constructed through a disease-autonomous multi-agent pipeline in which multiple frontier LLMs independently extract knowledge from PubMed and PMC literature. Only those relations are kept that are supported by multi-model consensus, survive credibility filtering, as well as ontology alignment. ChronoMedKG scored 92.7% agreement against Orphadata and adds temporal grounding for 6,250 diseases absent from HPOA, Orphadata, and Phenopackets, including 1,657 Orphanet-coded rare diseases. We further introduce ChronoTQA, a benchmark of 3,341 questions across eight task types (six temporal plus two static controls), with a 12-question supplementary probe. Frontier LLMs lose roughly 30 points moving from static to temporal questions; ChronoMedKG retrieval rescues 47-65% of their long-tail failures, against 17-29% for HPOA-RAG. As such, ChronoMedKG provides a crucial temporal axis for retrieval-augmented clinical systems that was previously absent.",
        "zh-CN": "构建了包含46万条证据关联三元组的时间生物医学知识图谱ChronoMedKG，提升临床推理的时序准确性。",
        "ja": "現存する医療知識グラフは時間情報を含まないため、長期的な臨床推論に不向きである。ChronoMedKGは460,497の証拠付き三項を含み、6,250の疾患に時系列情報を追加する。ChronoTQAは8つのタスクタイプを持つベンチマークで、LLMの性能向上に寄与する。",
        "ko": "ChronoMedKG is a biomedical knowledge graph with temporal information, enhancing clinical reasoning. It provides temporal grounding for many diseases and improves retrieval in medical systems."
      }
    },
    {
      "arxivId": "2605.22820v1",
      "title": "Integrable Elasticity via Neural Demand Potentials",
      "summary": "We propose the Integrable Context-Dependent Demand Network (ICDN), a demand-first neural model for multiproduct retail demand. The model learns log-demand as a smooth, context-conditioned function of log-prices, allowing elasticities to be derived exactly from the learned demand surface. On the Dominick's beer dataset, ICDN improves out-of-sample generalization over a directed log-log benchmark and yields more stable, economically plausible elasticity estimates, especially for weakly identified cross-price effects.",
      "authors": [
        "Carlos Heredia",
        "Daniel Roncel"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:59:47Z",
      "submittedAt": "2026-05-21T17:59:47Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22820v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22820v1",
      "hfUrl": "https://arxiv.org/abs/2605.22820v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG"
      ],
      "source": "arXiv cs.LG",
      "sourceType": "arxiv",
      "title_zh": "通过神经需求势的可积弹性",
      "summary_zh": "提出ICDN模型，基于日志价格学习需求函数，精确计算弹性，提升啤酒数据集的泛化能力和经济合理性",
      "title_i18n": {
        "en": "Integrable Elasticity via Neural Demand Potentials",
        "zh-CN": "通过神经需求势的可积弹性",
        "ja": "Neural Demand Potentialsによる統合的弾性",
        "ko": "Integrable Elasticity via Neural Demand Potentials"
      },
      "summary_i18n": {
        "en": "We propose the Integrable Context-Dependent Demand Network (ICDN), a demand-first neural model for multiproduct retail demand. The model learns log-demand as a smooth, context-conditioned function of log-prices, allowing elasticities to be derived exactly from the learned demand surface. On the Dominick's beer dataset, ICDN improves out-of-sample generalization over a directed log-log benchmark and yields more stable, economically plausible elasticity estimates, especially for weakly identified cross-price effects.",
        "zh-CN": "提出ICDN模型，基于日志价格学习需求函数，精确计算弹性，提升啤酒数据集的泛化能力和经济合理性",
        "ja": "ICDNは複数製品小売需要をモデル化する需要優先のニューラルモデルであり、需要曲面から弾性を正確に導出できる。Dominick'sビールデータセットで、より安定した弾性推定が得られる。",
        "ko": "The Integrable Context-Dependent Demand Network (ICDN) models retail demand, enabling exact elasticity calculations from learned demand surfaces, improving generalization and economic plausibility."
      }
    },
    {
      "arxivId": "2605.22814v1",
      "title": "Remember to be Curious: Episodic Context and Persistent Worlds for 3D Exploration",
      "summary": "Exploration is a prerequisite for learning useful behaviors in sparse-reward, long-horizon tasks, particularly within 3D environments. Curiosity-driven reinforcement learning addresses this via intrinsic rewards derived from the mismatch between the agent's predictive model of the world and reality. However, translating this intrinsic motivation to complex, photorealistic environments remains difficult, as agents can become trapped in local loops and receive fresh rewards for revisiting forgotten states. In this work, we demonstrate that this failure stems from a lack of spatial persistence and episodic context. We show that effective curiosity requires a model of the world that is persistent and continuously updated, paired with an agent that maintains an episodic trajectory history to navigate toward novel regions. We achieve this using an online 3D reconstruction as a persistent model of the world, while the agent policy is parameterized as a sequence model over RGB observations to maintain episodic context. This design enables effective exploration during training while allowing the agent to navigate using solely RGB frames at deployment. Trained purely via curiosity on HM3D, our agent outperforms RL-based active mapping baselines and generalizes zero-shot to Gibson and AI-generated worlds. Our end-to-end policy enables efficient adaptation to downstream tasks, such as apple picking and image-goal navigation, outperforming from-scratch baselines. Please see video results at https://recuriosity.github.io/.",
      "authors": [
        "Lily Goli",
        "Justin Kerr",
        "Daniele Reda",
        "Alec Jacobson",
        "Andrea Tagliasacchi",
        "Angjoo Kanazawa"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:58:06Z",
      "submittedAt": "2026-05-21T17:58:06Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22814v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22814v1",
      "hfUrl": "https://arxiv.org/abs/2605.22814v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG"
      ],
      "source": "arXiv cs.LG",
      "sourceType": "arxiv",
      "title_zh": "记住要好奇：用于3D探索的事件上下文与持久世界",
      "summary_zh": "基于GPT和LoRA方法，构建持久世界模型与事件轨迹历史，提升3D环境下的探索效率与泛化能力。",
      "title_i18n": {
        "en": "Remember to be Curious: Episodic Context and Persistent Worlds for 3D Exploration",
        "zh-CN": "记住要好奇：用于3D探索的事件上下文与持久世界",
        "ja": "好奇心を忘れるな：3D探索のためのエピソード的文脈と持続的世界",
        "ko": "Remember to be Curious: Episodic Context and Persistent Worlds for 3D Exploration"
      },
      "summary_i18n": {
        "en": "Exploration is a prerequisite for learning useful behaviors in sparse-reward, long-horizon tasks, particularly within 3D environments. Curiosity-driven reinforcement learning addresses this via intrinsic rewards derived from the mismatch between the agent's predictive model of the world and reality. However, translating this intrinsic motivation to complex, photorealistic environments remains difficult, as agents can become trapped in local loops and receive fresh rewards for revisiting forgotten states. In this work, we demonstrate that this failure stems from a lack of spatial persistence and episodic context. We show that effective curiosity requires a model of the world that is persistent and continuously updated, paired with an agent that maintains an episodic trajectory history to navigate toward novel regions. We achieve this using an online 3D reconstruction as a persistent model of the world, while the agent policy is parameterized as a sequence model over RGB observations to maintain episodic context. This design enables effective exploration during training while allowing the agent to navigate using solely RGB frames at deployment. Trained purely via curiosity on HM3D, our agent outperforms RL-based active mapping baselines and generalizes zero-shot to Gibson and AI-generated worlds. Our end-to-end policy enables efficient adaptation to downstream tasks, such as apple picking and image-goal navigation, outperforming from-scratch baselines. Please see video results at https://recuriosity.github.io/.",
        "zh-CN": "基于GPT和LoRA方法，构建持久世界模型与事件轨迹历史，提升3D环境下的探索效率与泛化能力。",
        "ja": "3D環境での有用な行動学習には探索が不可欠である。本研究では、空間的持続性とエピソード的文脈を用いて、効果的な探索を実現する。RGBフレームのみで動作するエージェントを構築し、実験で優れた性能を示す。",
        "ko": "This work enhances 3D exploration by combining persistent world models with episodic context, enabling agents to explore novel regions effectively using only RGB frames."
      }
    },
    {
      "arxivId": "2605.22800v1",
      "title": "The Matching Principle: A Geometric Theory of Loss Functions for Nuisance-Robust Representation Learning",
      "summary": "Robustness, domain adaptation, photometric and occlusion invariance, compositional generalisation, temporal robustness, alignment safety, and classical anisotropic regularisation are usually treated as separate problems with separate method families. This paper argues that much of their shared structure is one statistical problem: estimate the covariance of label-preserving deployment nuisance, then regularise the encoder Jacobian along a matrix whose range covers that covariance (the matching principle). CORAL, adversarial training, IRM, augmentation, metric learning, Jacobian penalties, and alignment-style constraints are different estimators of that object, not independent robustness tricks. In the linear-Gaussian model we prove closed-form optimality (Theorem A), including cube-root water-filling within the matched range; necessity of range coverage for quadratic Jacobian penalties (Theorem G); the same range dichotomy at deep global minima; and two falsification controls (Lemma C; Corollaries E), with seven conditional consistency lemmas (D1-D7) for estimation under standard identifiability assumptions. We introduce the Trajectory Deviation Index (TDI), a label-free probe of embedding sensitivity when task accuracy or Jacobian Frobenius norm is insufficient. Thirteen pre-registered blocks from classical ML through Qwen2.5-7B test the predicted matched, then isotropic, then wrong-W ordering on geometry and deployment drift; twelve pass, and the sole exception (Office-31) is an eigengap failure named before the run. At 7B scale, matched style-PMH improves selective honesty and preserves Style TDI where standard DPO degrades it. The contribution is naming the deployment nuisance covariance, stating what the regulariser must do, and supplying a closed-form falsifiable theory once that object is identified, not universality on every leaderboard.",
      "authors": [
        "Vishal Rajput"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:53:28Z",
      "submittedAt": "2026-05-21T17:53:28Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22800v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22800v1",
      "hfUrl": "https://arxiv.org/abs/2605.22800v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG",
        "cs.AI",
        "stat.ML"
      ],
      "source": "arXiv cs.LG",
      "sourceType": "arxiv",
      "title_zh": "匹配原理：一种针对干扰鲁棒表示学习的几何理论",
      "summary_zh": "提出匹配原理，通过估计干扰协方差并正则化编码器雅可比矩阵，统一多种鲁棒性方法，验证其在多个模型和数据集上的有效性。",
      "title_i18n": {
        "en": "The Matching Principle: A Geometric Theory of Loss Functions for Nuisance-Robust Representation Learning",
        "zh-CN": "匹配原理：一种针对干扰鲁棒表示学习的几何理论",
        "ja": "マッチング原理：ノイズに強い表現学習のための幾何学的損失関数理論",
        "ko": "The Matching Principle: A Geometric Theory of Loss Functions for Nuisance-Robust Representation Learning"
      },
      "summary_i18n": {
        "en": "Robustness, domain adaptation, photometric and occlusion invariance, compositional generalisation, temporal robustness, alignment safety, and classical anisotropic regularisation are usually treated as separate problems with separate method families. This paper argues that much of their shared structure is one statistical problem: estimate the covariance of label-preserving deployment nuisance, then regularise the encoder Jacobian along a matrix whose range covers that covariance (the matching principle). CORAL, adversarial training, IRM, augmentation, metric learning, Jacobian penalties, and alignment-style constraints are different estimators of that object, not independent robustness tricks. In the linear-Gaussian model we prove closed-form optimality (Theorem A), including cube-root water-filling within the matched range; necessity of range coverage for quadratic Jacobian penalties (Theorem G); the same range dichotomy at deep global minima; and two falsification controls (Lemma C; Corollaries E), with seven conditional consistency lemmas (D1-D7) for estimation under standard identifiability assumptions. We introduce the Trajectory Deviation Index (TDI), a label-free probe of embedding sensitivity when task accuracy or Jacobian Frobenius norm is insufficient. Thirteen pre-registered blocks from classical ML through Qwen2.5-7B test the predicted matched, then isotropic, then wrong-W ordering on geometry and deployment drift; twelve pass, and the sole exception (Office-31) is an eigengap failure named before the run. At 7B scale, matched style-PMH improves selective honesty and preserves Style TDI where standard DPO degrades it. The contribution is naming the deployment nuisance covariance, stating what the regulariser must do, and supplying a closed-form falsifiable theory once that object is identified, not universality on every leaderboard.",
        "zh-CN": "提出匹配原理，通过估计干扰协方差并正则化编码器雅可比矩阵，统一多种鲁棒性方法，验证其在多个模型和数据集上的有效性。",
        "ja": "ロバスト性、ドメイン適応、光度・遮蔽不変性などは統一的に扱われるべきである。本論文では、ラベル保持ノイズの共分散を推定し、エンコーダーのヤコビアンを正規化する「マッチング原理」を提案する。CORALやアダバーシャルトレーニングなどはこの原理の異なる推定器である。",
        "ko": "This paper unifies various robustness techniques under the matching principle, providing a geometric theory for loss functions that regularize representations against nuisance factors."
      }
    },
    {
      "arxivId": "2605.22795v1",
      "title": "Finite-Particle Convergence Rates for Conservative and Non-Conservative Drifting Models",
      "summary": "We propose and analyze a conservative drifting method for one-step generative modeling. The method replaces the original displacement-based drifting velocity by a kernel density estimator (KDE)-gradient velocity, namely the difference of the kernel-smoothed data score and the kernel-smoothed model score. This velocity is a gradient field, addressing the non-conservatism issue identified for general displacement-based drifting fields. We prove continuous-time finite-particle convergence bounds for the conservative method on $\\R^d$: a joint-entropy identity yields bounds for the empirical Stein drift, the smoothed Fisher discrepancy of the KDE, and the squared center velocity. The main finite-particle correction is a reciprocal-KDE self-interaction term, and we give deterministic and high-probability local-occupancy conditions under which this term is controlled. We keep the quadrature constants explicit and track their possible bandwidth dependence: the root residual-velocity rate $N^{-1/(d+4)}$ holds under an additional $h$-uniform quadrature regularity condition, while a more general growth condition yields the optimized root rate $N^{-(2-β)/(2(d+4-β))}$, where $0\\le β<2$. We also analyze the non-conservative drifting method with Laplace kernel, corresponding to the original displacement-based velocity proposed in~\\cite{deng2026drifting}. For this method, a sharp companion kernel decomposes the velocity into a positive scalar preconditioning of a sharp-score mismatch plus a Laplace scale-mismatch residual, producing an analogous finite-particle rate with an unavoidable residual term. Finally, we explain how the continuous-time residual-velocity bounds translate into one-step generation guarantees through the explicit drift size $η$.",
      "authors": [
        "Krishnakumar Balasubramanian"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T17:49:09Z",
      "submittedAt": "2026-05-21T17:49:09Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22795v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22795v1",
      "hfUrl": "https://arxiv.org/abs/2605.22795v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "stat.ML",
        "cs.AI",
        "cs.LG",
        "math.ST"
      ],
      "source": "arXiv cs.LG",
      "sourceType": "arxiv",
      "title_zh": "保守与非保守漂移模型的有限粒子收敛率",
      "summary_zh": "提出基于KDE梯度的保守漂移方法，证明其在R^d上的有限粒子收敛界，包含联合熵和自相互作用项，优化收敛率为N^{-(2-β)/(2(d+4-β))}。",
      "title_i18n": {
        "en": "Finite-Particle Convergence Rates for Conservative and Non-Conservative Drifting Models",
        "zh-CN": "保守与非保守漂移模型的有限粒子收敛率",
        "ja": "保存的および非保存的移動モデルの有限粒子収束速度",
        "ko": "Finite-Particle Convergence Rates for Conservative and Non-Conservative Drifting Models"
      },
      "summary_i18n": {
        "en": "We propose and analyze a conservative drifting method for one-step generative modeling. The method replaces the original displacement-based drifting velocity by a kernel density estimator (KDE)-gradient velocity, namely the difference of the kernel-smoothed data score and the kernel-smoothed model score. This velocity is a gradient field, addressing the non-conservatism issue identified for general displacement-based drifting fields. We prove continuous-time finite-particle convergence bounds for the conservative method on $\\R^d$: a joint-entropy identity yields bounds for the empirical Stein drift, the smoothed Fisher discrepancy of the KDE, and the squared center velocity. The main finite-particle correction is a reciprocal-KDE self-interaction term, and we give deterministic and high-probability local-occupancy conditions under which this term is controlled. We keep the quadrature constants explicit and track their possible bandwidth dependence: the root residual-velocity rate $N^{-1/(d+4)}$ holds under an additional $h$-uniform quadrature regularity condition, while a more general growth condition yields the optimized root rate $N^{-(2-β)/(2(d+4-β))}$, where $0\\le β<2$. We also analyze the non-conservative drifting method with Laplace kernel, corresponding to the original displacement-based velocity proposed in~\\cite{deng2026drifting}. For this method, a sharp companion kernel decomposes the velocity into a positive scalar preconditioning of a sharp-score mismatch plus a Laplace scale-mismatch residual, producing an analogous finite-particle rate with an unavoidable residual term. Finally, we explain how the continuous-time residual-velocity bounds translate into one-step generation guarantees through the explicit drift size $η$.",
        "zh-CN": "提出基于KDE梯度的保守漂移方法，证明其在R^d上的有限粒子收敛界，包含联合熵和自相互作用项，优化收敛率为N^{-(2-β)/(2(d+4-β))}。",
        "ja": "1ステップ生成モデルのための保存的移動法を提案し、その収束速度を解析する。KDE勾配速度を用いることで、非保存性の問題を解決する。連続時間の収束境界を導出し、パラメータ依存性を評価する。",
        "ko": "This study analyzes convergence rates for conservative and non-conservative drifting methods in generative modeling, offering theoretical bounds and conditions for controlled particle behavior."
      }
    },
    {
      "arxivId": "2605.22724v1",
      "title": "Multiple Neural Operators Achieve Near-Optimal Rates for Multi-Task Learning",
      "summary": "We study the approximation and statistical complexity of learning collections of operators in a shared multi-task setting, with a focus on the Multiple Neural Operators (MNO) architecture. For broad classes of Lipschitz multiple operator maps, we derive near-optimal upper bounds for approximation and statistical generalization. On the lower-bound side, we establish a curse of parametric complexity and prove corresponding minimax rates. Together, these results show that shared representations across tasks do not increase the overall cost: multi-task operator learning follows the same scaling laws as single operator learning. We also compare MNO with a multi-task extension of DeepONet based on concatenated task inputs and show that, from a worst-case approximation-complexity perspective, both architectures satisfy essentially the same asymptotic rates.",
      "authors": [
        "Adrien Weihs",
        "Hayden Schaeffer"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T16:57:33Z",
      "submittedAt": "2026-05-21T16:57:33Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22724v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22724v1",
      "hfUrl": "https://arxiv.org/abs/2605.22724v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG",
        "math.NA",
        "stat.ML"
      ],
      "source": "arXiv stat.ML",
      "sourceType": "arxiv",
      "title_zh": "多神经算子在多任务学习中实现近最优率",
      "summary_zh": "提出多神经算子(MNO)架构，证明其在Lipschitz算子映射下具有近最优逼近和泛化性能，验证多任务共享表示不增加整体复杂度。",
      "title_i18n": {
        "en": "Multiple Neural Operators Achieve Near-Optimal Rates for Multi-Task Learning",
        "zh-CN": "多神经算子在多任务学习中实现近最优率",
        "ja": "複数のニューラルオペレーターはマルチタスク学習で近似最適率を達成",
        "ko": "Multiple Neural Operators Achieve Near-Optimal Rates for Multi-Task Learning"
      },
      "summary_i18n": {
        "en": "We study the approximation and statistical complexity of learning collections of operators in a shared multi-task setting, with a focus on the Multiple Neural Operators (MNO) architecture. For broad classes of Lipschitz multiple operator maps, we derive near-optimal upper bounds for approximation and statistical generalization. On the lower-bound side, we establish a curse of parametric complexity and prove corresponding minimax rates. Together, these results show that shared representations across tasks do not increase the overall cost: multi-task operator learning follows the same scaling laws as single operator learning. We also compare MNO with a multi-task extension of DeepONet based on concatenated task inputs and show that, from a worst-case approximation-complexity perspective, both architectures satisfy essentially the same asymptotic rates.",
        "zh-CN": "提出多神经算子(MNO)架构，证明其在Lipschitz算子映射下具有近最优逼近和泛化性能，验证多任务共享表示不增加整体复杂度。",
        "ja": "共有マルチタスク設定におけるオペレーターの近似と統計的複雑性を研究し、MNOアーキテクチャを提案する。Lipschitz多重オペレーター写像に対して近似最適上界を導出する。",
        "ko": "The Multiple Neural Operators (MNO) architecture achieves near-optimal rates for multi-task learning, showing that shared representations do not increase overall complexity compared to single-task learning."
      }
    },
    {
      "arxivId": "2605.22579v1",
      "title": "Beyond Temperature: Hyperfitting as a Late-Stage Geometric Expansion",
      "summary": "Recent work has identified a counterintuitive phenomenon termed \"Hyperfitting\", where fine-tuning Large Language Models (LLMs) to near-zero training loss on small datasets surprisingly enhances open-ended generation quality and mitigates repetition in greedy decoding. While effective, the underlying mechanism remains poorly understood, with the extremely low-entropy output distributions suggesting a potential equivalence to simple temperature scaling. In this work, we demonstrate that this phenomenon is fundamentally distinct from distribution sharpening; entropy-matched control experiments reveal that temperature scaling fails to replicate the diversity gains of hyperfitting. Furthermore, we falsify the hypothesis of static vocabulary reweighting, showing through ablation studies that hyperfitting relies on a dynamic, context-dependent rank reordering mechanism. Layer-wise analysis localizes this effect to a \"Terminal Expansion\" in the final transformer block, where a substantial geometric expansion of the feature space (Delta Dim approx +80.8) facilitates the promotion of deep-tail tokens. Additionally, we introduce Late-Stage LoRA, a targeted fine-tuning strategy that updates only the final 5 layers, yielding robust generation with minimal parameter updates",
      "authors": [
        "Meimingwei Li",
        "Yuanhao Ding",
        "Esteban Garces Arias",
        "Christian Heumann"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T14:52:48Z",
      "submittedAt": "2026-05-21T14:52:48Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22579v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22579v1",
      "hfUrl": "https://arxiv.org/abs/2605.22579v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.CL",
        "cs.AI",
        "stat.ML"
      ],
      "source": "arXiv stat.ML",
      "sourceType": "arxiv",
      "title_zh": "超越温度：超拟合作为后期几何扩展",
      "summary_zh": "通过Late-Stage LoRA微调最后5层，实现特征空间几何扩展，提升生成质量与多样性。",
      "title_i18n": {
        "en": "Beyond Temperature: Hyperfitting as a Late-Stage Geometric Expansion",
        "zh-CN": "超越温度：超拟合作为后期几何扩展",
        "ja": "温度を超えて：ハイパーフィッティングは後期段階の幾何拡張である",
        "ko": "Beyond Temperature: Hyperfitting as a Late-Stage Geometric Expansion"
      },
      "summary_i18n": {
        "en": "Recent work has identified a counterintuitive phenomenon termed \"Hyperfitting\", where fine-tuning Large Language Models (LLMs) to near-zero training loss on small datasets surprisingly enhances open-ended generation quality and mitigates repetition in greedy decoding. While effective, the underlying mechanism remains poorly understood, with the extremely low-entropy output distributions suggesting a potential equivalence to simple temperature scaling. In this work, we demonstrate that this phenomenon is fundamentally distinct from distribution sharpening; entropy-matched control experiments reveal that temperature scaling fails to replicate the diversity gains of hyperfitting. Furthermore, we falsify the hypothesis of static vocabulary reweighting, showing through ablation studies that hyperfitting relies on a dynamic, context-dependent rank reordering mechanism. Layer-wise analysis localizes this effect to a \"Terminal Expansion\" in the final transformer block, where a substantial geometric expansion of the feature space (Delta Dim approx +80.8) facilitates the promotion of deep-tail tokens. Additionally, we introduce Late-Stage LoRA, a targeted fine-tuning strategy that updates only the final 5 layers, yielding robust generation with minimal parameter updates",
        "zh-CN": "通过Late-Stage LoRA微调最后5层，实现特征空间几何扩展，提升生成质量与多样性。",
        "ja": "ハイパーフィッティングは、小さなデータセットでLLMを微調整することで、生成品質を向上させる現象である。本研究では、これは分布の鋭さとは異なることを示し、動的なランク再順序付けに依存することを明らかにする。",
        "ko": "Hyperfitting, a phenomenon where fine-tuning LLMs improves generation quality, is shown to involve a geometric expansion in the final transformer layer, distinct from temperature scaling."
      }
    },
    {
      "arxivId": "2605.22507v1",
      "title": "Generative Modeling by Value-Driven Transport",
      "summary": "We propose a new framework for generative modeling based on a discrete-time stochastic control formulation of measure transport. Adapting classic results from control theory, we formulate our problem as a linear program whose dual variables correspond to the \\emph{optimal value function} of the control problem, which directly encodes the optimal control policy. Exploiting this LP formulation, we develop an efficient simulation-free primal-dual algorithm for computing approximately optimal value functions and the associated \\emph{value-driven transport} (VDT) policies which approximate the true optimal policy. We show that well-trained VDT policies enjoy numerous favorable properties in comparison with other state-of-the-art methods based on flows, diffusions, or Schrödinger bridges: they lead to straight transport paths which can be simulated quickly and robustly, and can be enhanced in all the same ways as diffusion and flow-based models (e.g., conditional generation, classifier-free guidance, unpaired data-to-data translation are all easy to incorporate). We evaluate our methodology in a range of experiments, with results that indicate strong performance and good potential for scalability.",
      "authors": [
        "Pablo Moreno-Muñoz",
        "Adrian Müller",
        "Gergely Neu"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T13:57:06Z",
      "submittedAt": "2026-05-21T13:57:06Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22507v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22507v1",
      "hfUrl": "https://arxiv.org/abs/2605.22507v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "cs.LG",
        "stat.ML"
      ],
      "source": "arXiv stat.ML",
      "sourceType": "arxiv",
      "title_zh": "基于价值驱动传输的生成建模",
      "summary_zh": "提出一种基于价值函数的生成建模框架，利用线性规划求解最优策略，实现高效价值驱动传输，性能优于流模型、扩散模型等方法。",
      "title_i18n": {
        "en": "Generative Modeling by Value-Driven Transport",
        "zh-CN": "基于价值驱动传输的生成建模",
        "ja": "Generative Modeling by Value-Driven Transport",
        "ko": "Generative Modeling by Value-Driven Transport",
        "es": "Modelado Generativo mediante Transporte Dirigido por Valor",
        "de": "Generative Modeling by Value-Driven Transport"
      },
      "summary_i18n": {
        "en": "We propose a new framework for generative modeling based on a discrete-time stochastic control formulation of measure transport. Adapting classic results from control theory, we formulate our problem as a linear program whose dual variables correspond to the \\emph{optimal value function} of the control problem, which directly encodes the optimal control policy. Exploiting this LP formulation, we develop an efficient simulation-free primal-dual algorithm for computing approximately optimal value functions and the associated \\emph{value-driven transport} (VDT) policies which approximate the true optimal policy. We show that well-trained VDT policies enjoy numerous favorable properties in comparison with other state-of-the-art methods based on flows, diffusions, or Schrödinger bridges: they lead to straight transport paths which can be simulated quickly and robustly, and can be enhanced in all the same ways as diffusion and flow-based models (e.g., conditional generation, classifier-free guidance, unpaired data-to-data translation are all easy to incorporate). We evaluate our methodology in a range of experiments, with results that indicate strong performance and good potential for scalability.",
        "zh-CN": "提出一种基于价值函数的生成建模框架，利用线性规划求解最优策略，实现高效价值驱动传输，性能优于流模型、扩散模型等方法。",
        "ja": "測度輸送のための新しい枠組みを提案し、最適な価値関数を直接符号化する制御問題の双対変数を用いる。",
        "ko": "새로운 생성 모델링 프레임워크를 제안합니다. 이는 측도 전송의 이산 시간 확률적 제어 형식을 기반으로 합니다.",
        "es": "Proponemos un marco nuevo para el modelado generativo basado en una formulación de control estocástico en tiempo discreto de transporte de medidas. Adaptamos resultados clásicos de la teoría del control, formulando nuestro problema como un programa lineal cuyas variables duales corresponden a la función de valor óptima del problema de control, que codifica directamente la política de control óptima. Aprovechando esta formulación LP, desarrollamos un algoritmo eficiente sin simulación para calcular funciones de valor aproximadamente óptimas y las políticas de transporte dirigido por valor (VDT) asociadas que aproximan la política óptima real. Mostramos que las políticas VDT bien entrenadas tienen numerosas propiedades favorables en comparación con otros métodos de vanguardia basados en flujos, difusiones o puentes de Schrödinger: conducen a caminos de transporte rectos que se pueden simular rápidamente y de manera robusta, y se pueden mejorar de la misma forma que los modelos basados en difusión y flujo (por ejemplo, generación condicional, guía sin clasificador, traducción datos-a-datos sin emparejamiento son fáciles de incorporar). Evaluamos nuestra metodología en una variedad de experimentos, con resultados que indican un buen rendimiento y un buen potencial de escalabilidad.",
        "de": "Wir schlagen einen neuen Rahmen für generative Modellierung basierend auf einer diskreten Zeitstochastik-Kontrollformulierung der Maßtransporte vor."
      }
    },
    {
      "arxivId": "2605.22438v1",
      "title": "Do Not Trust The Auctioneer: Learning to Bid in Feedback-Manipulated Auctions",
      "summary": "Shilling is the use of artificial bids to make competition appear stronger and push prices upward. We study repeated first-price auctions in which shilling affects feedback but not allocation: the learner wins or loses against the real competing bid, but after a loss observes the maximum of the real bid and an independent shill bid. Thus the manipulation changes what the learner observes and hence how it learns to bid, without changing the outcome of the current auction. We analyze regret with respect to the best bid benchmark, assuming that the shill-bid distribution is known. Even then, shilling can mask the real bid, while useful side information appears only through intermittent low-shill events. Our algorithm combines a robust interval-elimination branch, which ignores the shilled report and achieves the dynamic-pricing rate $\\tilde{\\mathcal{O}}(T^{2/3})$, with an optimistic branch that debiases losing-side reports and exploits the resulting suffix information when it is reliable and achieves the first-price auctions rate $\\tilde{\\mathcal{O}}(\\sqrt{T})$. A validation and racing procedure lets the algorithm use these optimistic updates without knowing the right scale or feedback geometry in advance. We complement the upper bounds with a matching lower bound, up to logarithmic factors, in the single-active-region case. Overall, the results show that even feedback-only shilling can sharply alter the statistical difficulty of repeated bidding.",
      "authors": [
        "Luigi Foscari",
        "Matilde Tullii",
        "Vianney Perchet"
      ],
      "organization": null,
      "publishedAt": "2026-05-21T13:06:55Z",
      "submittedAt": "2026-05-21T13:06:55Z",
      "upvotes": 0,
      "comments": 0,
      "thumbnail": null,
      "arxivUrl": "https://arxiv.org/abs/2605.22438v1",
      "pdfUrl": "https://arxiv.org/pdf/2605.22438v1",
      "hfUrl": "https://arxiv.org/abs/2605.22438v1",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "stat.ML",
        "cs.GT",
        "cs.LG"
      ],
      "source": "arXiv stat.ML",
      "sourceType": "arxiv",
      "title_zh": "不要相信拍卖师：在反馈被操控的拍卖中学习出价",
      "summary_zh": "研究反馈被操控拍卖中的出价学习，结合鲁棒区间消除和乐观分支方法，在已知假标分布下实现动态定价率O(T^{2/3})和首价拍卖率O(√T)。",
      "title_i18n": {
        "en": "Do Not Trust The Auctioneer: Learning to Bid in Feedback-Manipulated Auctions",
        "zh-CN": "不要相信拍卖师：在反馈被操控的拍卖中学习出价",
        "ja": "Do Not Trust The Auctioneer: Learning to Bid in Feedback-Manipulated Auctions",
        "ko": "Do Not Trust The Auctioneer: Learning to Bid in Feedback-Manipulated Auctions",
        "es": "No Confíes en el Subastador: Aprender a Ofertar en Subastas Manipuladas por Retroalimentación",
        "de": "Do Not Trust The Auctioneer: Learning to Bid in Feedback-Manipulated Auctions"
      },
      "summary_i18n": {
        "en": "Shilling is the use of artificial bids to make competition appear stronger and push prices upward. We study repeated first-price auctions in which shilling affects feedback but not allocation: the learner wins or loses against the real competing bid, but after a loss observes the maximum of the real bid and an independent shill bid. Thus the manipulation changes what the learner observes and hence how it learns to bid, without changing the outcome of the current auction. We analyze regret with respect to the best bid benchmark, assuming that the shill-bid distribution is known. Even then, shilling can mask the real bid, while useful side information appears only through intermittent low-shill events. Our algorithm combines a robust interval-elimination branch, which ignores the shilled report and achieves the dynamic-pricing rate $\\tilde{\\mathcal{O}}(T^{2/3})$, with an optimistic branch that debiases losing-side reports and exploits the resulting suffix information when it is reliable and achieves the first-price auctions rate $\\tilde{\\mathcal{O}}(\\sqrt{T})$. A validation and racing procedure lets the algorithm use these optimistic updates without knowing the right scale or feedback geometry in advance. We complement the upper bounds with a matching lower bound, up to logarithmic factors, in the single-active-region case. Overall, the results show that even feedback-only shilling can sharply alter the statistical difficulty of repeated bidding.",
        "zh-CN": "研究反馈被操控拍卖中的出价学习，结合鲁棒区间消除和乐观分支方法，在已知假标分布下实现动态定价率O(T^{2/3})和首价拍卖率O(√T)。",
        "ja": "フィードバック操作されたオークションでの入札学習を研究し、シャッリングが学習に与える影響を分析する。",
        "ko": "우리는 피드백이 조작된 경매에서 입찰하는 방법을 배우는 것을 연구합니다. 이는 경매 결과에 영향을 주지 않고 관찰에만 영향을 줍니다.",
        "es": "La shilling es el uso de ofertas artificiales para hacer que la competencia parezca más fuerte y empujar los precios hacia arriba. Estudiamos subastas de primer precio repetidas en las que la shilling afecta la retroalimentación pero no la asignación: el aprendiz ganará o perderá contra la oferta real, pero después de una pérdida observará el máximo entre la oferta real y una oferta de shill independiente. Por lo tanto, la manipulación cambia lo que el aprendiz observa y, por lo tanto, cómo aprende a ofertar, sin cambiar el resultado de la subasta actual. Analizamos el arrepentimiento con respecto al mejor criterio de oferta, asumiendo que la distribución de ofertas de shill es conocida. Incluso entonces, la shilling puede ocultar la oferta real, mientras que la información útil aparece solo a través de eventos intermitentes de baja shilling. Nuestro algoritmo combina una rama de eliminación de intervalos robusta, que ignora el informe de shill y alcanza la tasa de precios dinámicos $\\tilde{\\mathcal{O}}(T^{2/3})$, con una rama optimista que desviaria los informes de pérdida y explotaría la información de la cola cuando sea confiable y alcanzará la tasa de subastas de primer precio $\\tilde{\\mathcal{O}}(\\sqrt{T})$. Un procedimiento de validación y carrera permite al algoritmo usar estas actualizaciones optimistas sin conocer la escala correcta ni la geometría de retroalimentación de antemano. Complementamos los límites superiores con un límite inferior coincidente, hasta factores logarítmicos, en el caso de una única región activa. En general, los resultados muestran que incluso la shilling solo en retroalimentación puede alterar drásticamente la dificultad estadística de la oferta repetida.",
        "de": "Shilling ist die Verwendung künstlicher Gebote, um den Wettbewerb stärker zu machen und Preise zu erhöhen."
      }
    },
    {
      "arxivId": "2605.19769",
      "title": "OpenComputer: Verifiable Software Worlds for Computer-Use Agents",
      "summary": "OpenComputer presents a framework for creating verifiable software environments for computer-use agents through integrated state verification, self-improving layers, task synthesis, and evaluation systems across multiple desktop applications.",
      "authors": [
        "Jinbiao Wei",
        "Qianran Ma",
        "Yilun Zhao",
        "Xiao Zhou",
        "Kangqi Ni",
        "Guo Gan"
      ],
      "organization": {
        "_id": "6532df27d690f3012efde84c",
        "name": "yale-nlp",
        "fullname": "Yale NLP Lab",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/65204db5b0e0d57453cb1809/9OAeiZ-BrN2g1h1yd6-1W.png"
      },
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 56,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.19769.png",
      "arxivUrl": "https://arxiv.org/abs/2605.19769",
      "pdfUrl": "https://arxiv.org/pdf/2605.19769.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.19769",
      "githubRepo": "https://github.com/echo0715/OpenComputer",
      "githubStars": 18,
      "keywords": [
        "verifier-grounded framework",
        "state verifiers",
        "self-evolving verification layer",
        "task-generation pipeline",
        "evaluation harness",
        "desktop applications"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "OpenComputer：用于计算机使用代理的可验证软件世界",
      "summary_zh": "构建可验证软件环境，通过状态验证和任务合成提升计算机代理性能",
      "title_i18n": {
        "en": "OpenComputer: Verifiable Software Worlds for Computer-Use Agents",
        "zh-CN": "OpenComputer：用于计算机使用代理的可验证软件世界",
        "ja": "OpenComputer: Verifiable Software Worlds for Computer-Use Agents",
        "ko": "OpenComputer: Verifiable Software Worlds for Computer-Use Agents",
        "es": "OpenComputer: Mundos de Software Verificables para Agentes de Uso Computacional",
        "de": "OpenComputer: Verifiable Software Worlds for Computer-Use Agents"
      },
      "summary_i18n": {
        "en": "OpenComputer presents a framework for creating verifiable software environments for computer-use agents through integrated state verification, self-improving layers, task synthesis, and evaluation systems across multiple desktop applications.",
        "zh-CN": "构建可验证软件环境，通过状态验证和任务合成提升计算机代理性能",
        "ja": "コンピュータ利用エージェントの検証可能なソフトウェア環境を作成するフレームワークを提示する。",
        "ko": "OpenComputer는 컴퓨터 사용 에이전트를 위한 검증 가능한 소프트웨어 환경을 만드는 프레임워크입니다.",
        "es": "OpenComputer presenta un marco para crear entornos de software verificables para agentes de uso computacional mediante verificación de estado integrada, capas auto-mejorantes, síntesis de tareas y sistemas de evaluación en múltiples aplicaciones de escritorio.",
        "de": "OpenComputer präsentiert einen Rahmen zur Erstellung verifizierbarer Softwareumgebungen für Computer-Nutzungsagenten."
      }
    },
    {
      "arxivId": "2605.16679",
      "title": "CHI-Bench: Can AI Agents Automate End-to-End, Long-Horizon, Policy-Rich Healthcare Workflows?",
      "summary": "Healthcare workflow benchmark challenges agents with policy-dense, multi-role, and multilateral interaction requirements, revealing significant performance gaps in automated enterprise applications.",
      "authors": [
        "Haolin Chen",
        "Deon Metelski",
        "Leon Qi",
        "Tao Xia",
        "Joonyul Lee",
        "Steve Brown"
      ],
      "organization": {
        "_id": "68edc38fec75faa72a18d292",
        "name": "actava",
        "fullname": "actAVA AI",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/68edc310a4f606a8123967e7/yn4FgSauqB_0VC1xYOgOf.png"
      },
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 49,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.16679.png",
      "arxivUrl": "https://arxiv.org/abs/2605.16679",
      "pdfUrl": "https://arxiv.org/pdf/2605.16679.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.16679",
      "githubRepo": "https://github.com/actava-ai/chi-bench",
      "githubStars": 24,
      "keywords": [
        "healthcare operations",
        "policy density",
        "multi-role composition",
        "multilateral interaction",
        "long-horizon workflows",
        "clinical cases"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "CHI-Bench：AI代理能否自动化长周期、多策略医疗流程？",
      "summary_zh": "构建CHI-Bench基准，评估AI代理在多角色、多策略医疗流程中的端到端自动化能力。",
      "title_i18n": {
        "en": "CHI-Bench: Can AI Agents Automate End-to-End, Long-Horizon, Policy-Rich Healthcare Workflows?",
        "zh-CN": "CHI-Bench：AI代理能否自动化长周期、多策略医疗流程？",
        "ja": "CHI-Bench: Can AI Agents Automate End-to-End, Long-Horizon, Policy-Rich Healthcare Workflows?",
        "ko": "CHI-Bench: Can AI Agents Automate End-to-End, Long-Horizon, Policy-Rich Healthcare Workflows?",
        "es": "CHI-Bench: ¿Pueden los Agentes de IA Automatizar Flujos de Trabajo de Salud de Alto Horizonte y de Políticas Ricas?",
        "de": "CHI-Bench: Can AI Agents Automate End-to-End, Long-Horizon, Policy-Rich Healthcare Workflows?"
      },
      "summary_i18n": {
        "en": "Healthcare workflow benchmark challenges agents with policy-dense, multi-role, and multilateral interaction requirements, revealing significant performance gaps in automated enterprise applications.",
        "zh-CN": "构建CHI-Bench基准，评估AI代理在多角色、多策略医疗流程中的端到端自动化能力。",
        "ja": "医療ワークフローのベンチマークでエージェントの性能ギャップを明らかにする。",
        "ko": "건강관리 워크플로우 벤치마크는 정책 밀도가 높은 작업을 자동화할 수 있는지 시험합니다.",
        "es": "El desafío de benchmark de CHI pone a prueba a los agentes con requisitos de interacción multi-rol y multilateral densos en políticas, revelando grandes brechas de rendimiento en aplicaciones empresariales automatizadas.",
        "de": "Gesundheitsworkflow-Benchmark-Herausforderungen für Agenten mit politisch dichten, mehreren Rollen und multilateralen Interaktionsanforderungen."
      }
    },
    {
      "arxivId": "2605.22012",
      "title": "LatentOmni: Rethinking Omni-Modal Understanding via Unified Audio-Visual Latent Reasoning",
      "summary": "LatentOmni is a cross-modal reasoning framework that interleaves textual reasoning with audio-visual latent states using feature-level supervision and temporal consistency embedding, outperforming explicit text-based chain-of-thought approaches in audio-visual reasoning tasks.",
      "authors": [
        "Yifan Dai",
        "Zhenhua Wu",
        "Bohan Zeng",
        "Daili Hua",
        "Jialing Liu",
        "Bozhou Li"
      ],
      "organization": {
        "_id": "662c559b322afcbae51b3c8b",
        "name": "KlingTeam",
        "fullname": "Kling Team",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/60e272ca6c78a8c122b12127/ZQV1aKLUDPf2rUcxxAqj6.jpeg"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 37,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22012.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22012",
      "pdfUrl": "https://arxiv.org/pdf/2605.22012.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22012",
      "githubRepo": "https://github.com/yfanDai/LatentOmni",
      "githubStars": 7,
      "keywords": [
        "multimodal large language models",
        "chain-of-thought",
        "latent space",
        "cross-modal reasoning",
        "feature-level supervision",
        "Omni-Sync Position Embedding"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "LatentOmni：通过统一音视频潜在推理重新思考多模态理解",
      "summary_zh": "LatentOmni通过特征级监督和时间一致性嵌入，将文本推理与音视频潜在状态交织，在音视频推理任务中优于基于显式文本的思维链方法。",
      "title_i18n": {
        "en": "LatentOmni: Rethinking Omni-Modal Understanding via Unified Audio-Visual Latent Reasoning",
        "zh-CN": "LatentOmni：通过统一音视频潜在推理重新思考多模态理解",
        "ja": "LatentOmni: Rethinking Omni-Modal Understanding via Unified Audio-Visual Latent Reasoning",
        "ko": "LatentOmni: Rethinking Omni-Modal Understanding via Unified Audio-Visual Latent Reasoning",
        "es": "LatentOmni: Revisando la Comprensión Omni-Modal mediante Razonamiento Latente Unificado Audiovisual",
        "de": "LatentOmni: Rethinking Omni-Modal Understanding via Unified Audio-Visual Latent Reasoning"
      },
      "summary_i18n": {
        "en": "LatentOmni is a cross-modal reasoning framework that interleaves textual reasoning with audio-visual latent states using feature-level supervision and temporal consistency embedding, outperforming explicit text-based chain-of-thought approaches in audio-visual reasoning tasks.",
        "zh-CN": "LatentOmni通过特征级监督和时间一致性嵌入，将文本推理与音视频潜在状态交织，在音视频推理任务中优于基于显式文本的思维链方法。",
        "ja": "音声・視覚の潜在的推論を統合したマルチモーダル理解の枠組みを提案する。",
        "ko": "LatentOmni는 통합된 오디오-비주얼 잠재 추론을 통해 다모달 이해를 재고합니다.",
        "es": "LatentOmni es un marco de razonamiento transmodal que intercala razonamiento textual con estados latentes audiovisuales usando supervisión a nivel de características y embebidos de consistencia temporal, superando en rendimiento a enfoques basados en cadena de pensamiento explícita en tareas de razonamiento audiovisual.",
        "de": "LatentOmni ist ein Rahmen für intermodalen Denkprozess, der textuelle und audiovisuelle latente Zustände kombiniert."
      }
    },
    {
      "arxivId": "2605.22681",
      "title": "Forecasting Scientific Progress with Artificial Intelligence",
      "summary": "Current AI systems demonstrate limited capability in predicting scientific progress, showing inconsistent performance across domains and systematic overconfidence in forecasts.",
      "authors": [
        "Sean Wu",
        "Pan Lu",
        "Yupeng Chen",
        "Jonathan Bragg",
        "Yutaro Yamada",
        "Peter Clark"
      ],
      "organization": {
        "_id": "627bbc28fbab61b048eba8b6",
        "name": "Oxford",
        "fullname": "University of Oxford",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/68e396f2b5bb631e9b2fac9a/u0ey2LfYu6uG6iu8m_kH7.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 31,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22681.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22681",
      "pdfUrl": "https://arxiv.org/pdf/2605.22681.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22681",
      "githubRepo": "https://github.com/SeanWu25/cusp-scientific-foresight",
      "githubStars": 14,
      "keywords": [
        "scientific forecasting",
        "artificial intelligence",
        "scientific progress",
        "temporal prediction",
        "mechanistic reasoning",
        "generative solution design"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "用人工智能预测科学进展",
      "summary_zh": "使用AI预测科学进展，发现其在不同领域表现不一致且存在系统性过度自信",
      "title_i18n": {
        "en": "Forecasting Scientific Progress with Artificial Intelligence",
        "zh-CN": "用人工智能预测科学进展",
        "ja": "Forecasting Scientific Progress with Artificial Intelligence",
        "ko": "Forecasting Scientific Progress with Artificial Intelligence",
        "es": "Predecir el Progreso Científico con Inteligencia Artificial",
        "de": "Forecasting Scientific Progress with Artificial Intelligence"
      },
      "summary_i18n": {
        "en": "Current AI systems demonstrate limited capability in predicting scientific progress, showing inconsistent performance across domains and systematic overconfidence in forecasts.",
        "zh-CN": "使用AI预测科学进展，发现其在不同领域表现不一致且存在系统性过度自信",
        "ja": "現在のAIシステムは科学的進歩の予測に限られた能力を持ち、領域間で不一致なパフォーマンスを示す。",
        "ko": "현재 AI 시스템은 과학적 진보 예측에 한계가 있으며, 다양한 분야에서 일관되지 않은 성능을 보입니다.",
        "es": "Los sistemas actuales de IA demuestran capacidad limitada para predecir el progreso científico, mostrando un rendimiento inconsistente entre dominios y una sobreconfianza sistemática en las predicciones.",
        "de": "Aktuelle KI-Systeme zeigen begrenzte Fähigkeit, wissenschaftlichen Fortschritt vorzusagen, mit unregelmäßiger Leistung über Domänen hinweg."
      }
    },
    {
      "arxivId": "2605.20613",
      "title": "HRM-Text: Efficient Pretraining Beyond Scaling",
      "summary": "A Hierarchical Recurrent Model architecture with specialized training on instruction-response pairs achieves competitive language modeling performance with significantly reduced computational requirements compared to traditional Transformer-based approaches.",
      "authors": [
        "Guan Wang",
        "Changling Liu",
        "Chenyu Wang",
        "Cai Zhou",
        "Yuhao Sun",
        "Yifei Wu"
      ],
      "organization": {
        "_id": "682c3fa004d9cd55c3fc728a",
        "name": "sapientinc",
        "fullname": "Sapient AI",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/61b6cbbdbfb266841ec0f24a/8H2aKoS6VwKLVd7psUudO.png"
      },
      "publishedAt": "2026-05-20T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 18,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.20613.png",
      "arxivUrl": "https://arxiv.org/abs/2605.20613",
      "pdfUrl": "https://arxiv.org/pdf/2605.20613.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.20613",
      "githubRepo": "https://github.com/sapientinc/HRM-Text",
      "githubStars": 650,
      "keywords": [
        "Hierarchical Recurrent Model",
        "Transformers",
        "deep recurrence",
        "MagicNorm",
        "warmup deep credit assignment",
        "instruction-response pairs"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "HRM-Text：超越扩展的高效预训练",
      "summary_zh": "提出HRM-Text模型，在指令-响应对上进行分层训练，显著降低计算需求并实现竞争性语言建模性能",
      "title_i18n": {
        "en": "HRM-Text: Efficient Pretraining Beyond Scaling",
        "zh-CN": "HRM-Text：超越扩展的高效预训练",
        "ja": "HRM-Text: Efficient Pretraining Beyond Scaling",
        "ko": "HRM-Text: Efficient Pretraining Beyond Scaling",
        "es": "HRM-Text: Preentrenamiento Eficiente Más Allá de la Escalabilidad",
        "de": "HRM-Text: Efficient Pretraining Beyond Scaling"
      },
      "summary_i18n": {
        "en": "A Hierarchical Recurrent Model architecture with specialized training on instruction-response pairs achieves competitive language modeling performance with significantly reduced computational requirements compared to traditional Transformer-based approaches.",
        "zh-CN": "提出HRM-Text模型，在指令-响应对上进行分层训练，显著降低计算需求并实现竞争性语言建模性能",
        "ja": "階層的な再帰モデルは、従来のトランスフォーマーと比較して計算要件を大幅に削減しながら競争力のある言語モデリング性能を達成する。",
        "ko": "HRM-Text는 전통적인 Transformer 기반 접근보다 훨씬 적은 계산 자원으로 경쟁력 있는 언어 모델링 성능을 달성합니다.",
        "es": "Una arquitectura de Modelo Recurrente Jerárquico con entrenamiento especializado en pares instrucción-respuesta logra un rendimiento competitivo en modelado de lenguaje con requerimientos computacionales significativamente reducidos en comparación con enfoques basados en Transformers tradicionales.",
        "de": "Eine hierarchische rekursive Modellarchitektur mit spezialisiertem Training auf Anweisung-Antwort-Paaren erreicht konkurrenzfähige Sprachmodellleistungen."
      }
    },
    {
      "arxivId": "2605.15529",
      "title": "Process Rewards with Learned Reliability",
      "summary": "BetaPRM introduces a distributional approach to process reward models that predicts both success probabilities and prediction reliability, enabling adaptive computation allocation that reduces token usage while maintaining accuracy.",
      "authors": [
        "Jinyuan Li",
        "Langlin Huang",
        "Chengsong Huang",
        "Shaoyang Xu",
        "Donghong Cai",
        "Yuyi Yang"
      ],
      "organization": {
        "_id": "670035f24055c4569f7dd024",
        "name": "HINT-lab",
        "fullname": "Huang's INTelligence lab",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/64efbf39b3610349e84db417/tbNZtAX3vJeGo2Rag_7ZN.png"
      },
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-20T00:00:00.000Z",
      "upvotes": 51,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.15529.png",
      "arxivUrl": "https://arxiv.org/abs/2605.15529",
      "pdfUrl": "https://arxiv.org/pdf/2605.15529.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.15529",
      "githubRepo": "https://github.com/JinYuanLi0012/Beta-Binomial-PRM",
      "githubStars": 8,
      "keywords": [
        "Process Reward Models",
        "BetaPRM",
        "distributional PRM",
        "Beta belief",
        "Beta-Binomial likelihood",
        "Monte Carlo continuations"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "基于学习可靠性的过程奖励方法",
      "summary_zh": "BetaPRM通过预测成功率和可靠性，实现自适应计算分配，减少token使用同时保持准确性。",
      "title_i18n": {
        "en": "Process Rewards with Learned Reliability",
        "zh-CN": "基于学习可靠性的过程奖励方法",
        "ja": "Process Rewards with Learned Reliability",
        "ko": "Process Rewards with Learned Reliability",
        "es": "Recompensas de Proceso con Confiabilidad Aprendida",
        "de": "Process Rewards with Learned Reliability"
      },
      "summary_i18n": {
        "en": "BetaPRM introduces a distributional approach to process reward models that predicts both success probabilities and prediction reliability, enabling adaptive computation allocation that reduces token usage while maintaining accuracy.",
        "zh-CN": "BetaPRM通过预测成功率和可靠性，实现自适应计算分配，减少token使用同时保持准确性。",
        "ja": "BetaPRMは成功確率と信頼性を予測する分布的プロセス報酬モデルを導入し、トークン使用量を削減する。",
        "ko": "BetaPRM는 성공 확률과 예측 신뢰도를 예측하는 분포적 접근을 도입합니다.",
        "es": "BetaPRM introduce un enfoque distribucional para modelos de recompensa de proceso que predice probabilidades de éxito y confiabilidad de predicción, permitiendo la asignación adaptativa de cálculo que reduce el uso de tokens manteniendo la precisión.",
        "de": "BetaPRM führt einen verteilten Ansatz für Prozessbelohnungsmodelle ein, der Erfolgswahrscheinlichkeiten und Vorhersagezuverlässigkeit vorhersagt."
      }
    },
    {
      "arxivId": "2605.21850",
      "title": "ACC: Compiling Agent Trajectories for Long-Context Training",
      "summary": "Agent Context Compilation (ACC) enhances long-context reasoning in LLMs by converting multi-turn agent trajectories into structured QA pairs, enabling direct supervision of distant context integration without additional annotation.",
      "authors": [
        "Qisheng Su",
        "Zhen Fang",
        "Shiting Huang",
        "Yu Zeng",
        "Yiming Zhao",
        "Kou Shi"
      ],
      "organization": {
        "_id": "67ff908ff0f413c693b7cd0c",
        "name": "ustc-community",
        "fullname": "University of Science and Technology of China",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/660c2d134ba2fcc848b03e21/j_f3uYYIFPH_4WJH9fKel.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 54,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.21850.png",
      "arxivUrl": "https://arxiv.org/abs/2605.21850",
      "pdfUrl": "https://arxiv.org/pdf/2605.21850.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.21850",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "long-context reasoning",
        "agent SFT",
        "tool responses",
        "environment observations",
        "trajectory conversion",
        "long-range dependency modeling"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "ACC：用于长上下文训练的智能体轨迹编译",
      "summary_zh": "通过将多轮智能体轨迹转化为结构化问答对，提升大语言模型的长上下文推理能力",
      "title_i18n": {
        "en": "ACC: Compiling Agent Trajectories for Long-Context Training",
        "zh-CN": "ACC：用于长上下文训练的智能体轨迹编译",
        "ja": "ACC: Long-Context Trainingのためのエージェントトレースのコンパイル",
        "ko": "ACC: Compiling Agent Trajectories for Long-Context Training",
        "es": "ACC: Compiling Agent Trajectories for Long-Context Training"
      },
      "summary_i18n": {
        "en": "Agent Context Compilation (ACC) enhances long-context reasoning in LLMs by converting multi-turn agent trajectories into structured QA pairs, enabling direct supervision of distant context integration without additional annotation.",
        "zh-CN": "通过将多轮智能体轨迹转化为结构化问答对，提升大语言模型的长上下文推理能力",
        "ja": "ACCは、マルチターンエージェントトレースを構造化されたQAペアに変換し、追加のアノテーションなしで遠距離文脈の統合を直接的に指導する。",
        "ko": "ACC는 다중 대화 트래잭터리를 구조화된 QA 쌍으로 변환하여 LLM의 장기적 맥락 추론을 향상시킵니다.",
        "es": "ACC mejora el razonamiento de contexto largo en LLMs convirtiendo trayectorias de agentes en pares QA estructurados, permitiendo supervisión directa sin anotación adicional."
      }
    },
    {
      "arxivId": "2605.19660",
      "title": "OScaR: The Occam's Razor for Extreme KV Cache Quantization in LLMs and Beyond",
      "summary": "OScaR is a novel KV cache compression framework that addresses token norm imbalance through canalized rotation and omni-token scaling, achieving significant improvements in memory efficiency and decoding speed for extended context language models.",
      "authors": [
        "Zunhai Su",
        "Rui Yang",
        "Chao Zhang",
        "Yaxiu Liu",
        "Yifan Zhang",
        "Wei Wu"
      ],
      "organization": {
        "_id": "67ea9ecfc234715db8dbf339",
        "name": "hkuhk",
        "fullname": "The University of Hong Kong",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ea9e8d2d95c10a0da11b0c/FNnR4M7YqKRuG43N5771B.png"
      },
      "publishedAt": "2026-05-19T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 39,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.19660.png",
      "arxivUrl": "https://arxiv.org/abs/2605.19660",
      "pdfUrl": "https://arxiv.org/pdf/2605.19660.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.19660",
      "githubRepo": "https://github.com/ZunhaiSu/OScaR-KV-Quant",
      "githubStars": 23,
      "keywords": [
        "Key-Value cache",
        "quantization",
        "Token Norm Imbalance",
        "per-channel quantization",
        "X-LLMs",
        "Canalized Rotation"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "OScaR：极端KV缓存量化中的奥卡姆剃刀",
      "summary_zh": "OScaR通过通道旋转和全令牌缩放解决token范数不平衡，显著提升长上下文语言模型的内存效率和解码速度。",
      "title_i18n": {
        "en": "OScaR: The Occam's Razor for Extreme KV Cache Quantization in LLMs and Beyond",
        "zh-CN": "OScaR：极端KV缓存量化中的奥卡姆剃刀",
        "ja": "OScaR: LLMsおよびそれ以上の極端なKVキャッシュ量子化のオッカムの剃刀",
        "ko": "OScaR: The Occam's Razor for Extreme KV Cache Quantization in LLMs and Beyond",
        "es": "OScaR: The Occam's Razor for Extreme KV Cache Quantization in LLMs and Beyond"
      },
      "summary_i18n": {
        "en": "OScaR is a novel KV cache compression framework that addresses token norm imbalance through canalized rotation and omni-token scaling, achieving significant improvements in memory efficiency and decoding speed for extended context language models.",
        "zh-CN": "OScaR通过通道旋转和全令牌缩放解决token范数不平衡，显著提升长上下文语言模型的内存效率和解码速度。",
        "ja": "OScaRは、通路回転とオムニトークンスケーリングを通じてトークンノーマルの不均衡に対処する新しいKVキャッシュ圧縮フレームワークであり、メモリ効率とデコード速度を向上させる。",
        "ko": "OScaR은 채널화된 회전과 전토큰 스케일링을 통해 토큰 노름 불균형을 해결합니다.",
        "es": "OScaR es un marco de compresión de caché KV que resuelve desequilibrios de norma de token mediante rotación canalizada y escalado omnitoken, mejorando la eficiencia de memoria y velocidad de decodificación."
      }
    },
    {
      "arxivId": "2605.18233",
      "title": "Enhancing Train-Free Infinite-Frame Generation for Consistent Long Videos",
      "summary": "MIGA addresses long video generation challenges by reducing training-inference gaps and enhancing temporal consistency through dual consistency mechanisms.",
      "authors": [
        "X. Feng",
        "J. Zhu",
        "M. Wu",
        "C. Chen",
        "F. Mao",
        "H. Guo"
      ],
      "organization": {
        "_id": "64488b334988ee01f2a8d856",
        "name": "alibaba-inc",
        "fullname": "alibaba-inc",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/61ac8f8a00d01045fca0ad2f/MX4wxQVaFm1A1wqnrL2WU.jpeg"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 87,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18233.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18233",
      "pdfUrl": "https://arxiv.org/pdf/2605.18233.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18233",
      "githubRepo": null,
      "githubStars": null,
      "keywords": [
        "frame-level autoregressive frameworks",
        "FIFO-diffusion",
        "training-inference gap",
        "noise span",
        "self-reflection approach",
        "long-range frame guidance"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "提升无训练无限帧生成的长视频一致性",
      "summary_zh": "MIGA通过双一致性机制减少训练与推理差距，提升长视频时间一致性",
      "title_i18n": {
        "en": "Enhancing Train-Free Infinite-Frame Generation for Consistent Long Videos",
        "zh-CN": "提升无训练无限帧生成的长视频一致性",
        "ja": "一貫した長時間動画のためのトレインフリー無限フレーム生成の強化",
        "ko": "Enhancing Train-Free Infinite-Frame Generation for Consistent Long Videos",
        "es": "Enhancing Train-Free Infinite-Frame Generation for Consistent Long Videos"
      },
      "summary_i18n": {
        "en": "MIGA addresses long video generation challenges by reducing training-inference gaps and enhancing temporal consistency through dual consistency mechanisms.",
        "zh-CN": "MIGA通过双一致性机制减少训练与推理差距，提升长视频时间一致性",
        "ja": "MIGAは、二重の整合性メカニズムにより、トレーニングと推論のギャップを減らし、時間的整合性を向上させることで長時間動画生成の課題に対処する。",
        "ko": "MIGA는 이중 일관성 메커니즘을 통해 장비 영상 생성의 훈련-추론 차이를 줄입니다.",
        "es": "MIGA reduce la brecha entre entrenamiento e inferencia y mejora la consistencia temporal mediante mecanismos de consistencia dual."
      }
    },
    {
      "arxivId": "2605.22791",
      "title": "Gated DeltaNet-2: Decoupling Erase and Write in Linear Attention",
      "summary": "Gated DeltaNet-2 improves upon existing linear attention models by separating erase and write operations through distinct channel-wise gates, achieving superior performance in long-context language modeling and retrieval tasks.",
      "authors": [
        "Ali Hatamizadeh",
        "Yejin Choi",
        "Jan Kautz"
      ],
      "organization": {
        "_id": "60262b67268c201cdc8b7d43",
        "name": "nvidia",
        "fullname": "NVIDIA",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/65df9200dc3292a8983e5017/Vs5FPVCH-VZBipV3qKTuy.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 16,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22791.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22791",
      "pdfUrl": "https://arxiv.org/pdf/2605.22791.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22791",
      "githubRepo": "https://github.com/NVlabs/GatedDeltaNet-2",
      "githubStars": 90,
      "keywords": [
        "linear attention",
        "softmax attention",
        "recurrent state",
        "delta-rule models",
        "Kimi Delta Attention",
        "Gated DeltaNet"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Gated DeltaNet-2：线性注意力中解耦擦除与写入",
      "summary_zh": "通过通道门控分离擦除与写入操作，提升长上下文语言建模与检索性能",
      "title_i18n": {
        "en": "Gated DeltaNet-2: Decoupling Erase and Write in Linear Attention",
        "zh-CN": "Gated DeltaNet-2：线性注意力中解耦擦除与写入",
        "ja": "Gated DeltaNet-2: 線形アテンションにおける消去と書き込みの分離",
        "ko": "Gated DeltaNet-2: Decoupling Erase and Write in Linear Attention",
        "es": "Gated DeltaNet-2: Decoupling Erase and Write in Linear Attention"
      },
      "summary_i18n": {
        "en": "Gated DeltaNet-2 improves upon existing linear attention models by separating erase and write operations through distinct channel-wise gates, achieving superior performance in long-context language modeling and retrieval tasks.",
        "zh-CN": "通过通道门控分离擦除与写入操作，提升长上下文语言建模与检索性能",
        "ja": "Gated DeltaNet-2は、別々のチャネルワイズゲートを通じて消去と書き込み操作を分離し、長文脈言語モデリングおよび検索タスクにおいて優れた性能を達成する。",
        "ko": "Gated DeltaNet-2는 별도의 채널별 게이트를 통해 삭제 및 쓰기 작업을 분리합니다.",
        "es": "Gated DeltaNet-2 mejora modelos de atención lineal separando operaciones de borrado y escritura con puertas canales distintas, logrando mejor rendimiento en tareas de modelado y recuperación de contexto largo."
      }
    },
    {
      "arxivId": "2605.22642",
      "title": "Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning",
      "summary": "Spreadsheet-RL is a reinforcement learning framework that trains specialized spreadsheet agents in realistic Excel environments, improving AI agent performance on both general and domain-specific spreadsheet tasks through automated data collection and domain-specific benchmarks.",
      "authors": [
        "Banghao Chi",
        "Yining Xie",
        "Mingyuan Wu",
        "Jingcheng Yang",
        "Jize Jiang",
        "Zhaoheng Li"
      ],
      "organization": {
        "_id": "65448bef5b5d9185ba3202b9",
        "name": "UIUC-CS",
        "fullname": "University of Illinois at Urbana-Champaign",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/65448b21fcb96b8b48733729/ycqcXFayMTTD_KpE37067.jpeg"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 31,
      "comments": 3,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22642.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22642",
      "pdfUrl": "https://arxiv.org/pdf/2605.22642.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22642",
      "githubRepo": "https://github.com/Spreadsheet-RL/Spreadsheet-RL",
      "githubStars": 6,
      "keywords": [
        "reinforcement learning",
        "fine-tuning",
        "spreadsheet agents",
        "Microsoft Excel",
        "automated pipeline",
        "domain-specific evaluation"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Spreadsheet-RL：通过强化学习提升大语言模型在真实表格任务中的表现",
      "summary_zh": "使用强化学习训练专用表格代理，提升AI在通用和领域特定表格任务中的性能。",
      "title_i18n": {
        "en": "Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning",
        "zh-CN": "Spreadsheet-RL：通过强化学习提升大语言模型在真实表格任务中的表现",
        "ja": "Spreadsheet-RL: 実際のスプレッドシートタスクにおける大規模言語モデルエージェントの進歩",
        "ko": "Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning",
        "es": "Spreadsheet-RL: Advancing Large Language Model Agents on Realistic Spreadsheet Tasks via Reinforcement Learning"
      },
      "summary_i18n": {
        "en": "Spreadsheet-RL is a reinforcement learning framework that trains specialized spreadsheet agents in realistic Excel environments, improving AI agent performance on both general and domain-specific spreadsheet tasks through automated data collection and domain-specific benchmarks.",
        "zh-CN": "使用强化学习训练专用表格代理，提升AI在通用和领域特定表格任务中的性能。",
        "ja": "Spreadsheet-RLは、自動データ収集とドメイン固有ベンチマークを通じて、一般的およびドメイン固有のスプレッドシートタスクにおけるAIエージェントのパフォーマンスを向上させる。",
        "ko": "Spreadsheet-RL은 실제 엑셀 환경에서 전문 스프레드시트 에이전트를 훈련시킵니다.",
        "es": "Spreadsheet-RL entrena agentes especializados en entornos Excel reales mediante aprendizaje por refuerzo, mejorando el rendimiento de agentes de IA en tareas de hojas de cálculo generales y específicas."
      }
    },
    {
      "arxivId": "2605.15980",
      "title": "Flash-GRPO: Efficient Alignment for Video Diffusion via One-Step Policy Optimization",
      "summary": "Flash-GRPO improves training efficiency for video diffusion models by addressing temporal variance and gradient inconsistency through iso-temporal grouping and temporal gradient rectification.",
      "authors": [
        "Xiaoxuan He",
        "Siming Fu",
        "Zeyue Xue",
        "Weijie Wang",
        "Ruizhe He",
        "Yuming Li"
      ],
      "organization": null,
      "publishedAt": "2026-05-15T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 35,
      "comments": 0,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.15980.png",
      "arxivUrl": "https://arxiv.org/abs/2605.15980",
      "pdfUrl": "https://arxiv.org/pdf/2605.15980.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.15980",
      "githubRepo": "https://github.com/Shredded-Pork/Flash-GRPO",
      "githubStars": 28,
      "keywords": [
        "Group Relative Policy Optimization",
        "video diffusion models",
        "parametered model",
        "sliding window subsampling",
        "full trajectory training",
        "single-step training framework"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "Flash-GRPO：通过单步策略优化的视频扩散高效对齐",
      "summary_zh": "使用时序分组和时序梯度校正提升视频扩散模型训练效率",
      "title_i18n": {
        "en": "Flash-GRPO: Efficient Alignment for Video Diffusion via One-Step Policy Optimization",
        "zh-CN": "Flash-GRPO：通过单步策略优化的视频扩散高效对齐",
        "ja": "Flash-GRPO: 1ステップ方針最適化によるビデオ拡散の効率的なアライメント",
        "ko": "Flash-GRPO: Efficient Alignment for Video Diffusion via One-Step Policy Optimization",
        "es": "Flash-GRPO: Efficient Alignment for Video Diffusion via One-Step Policy Optimization"
      },
      "summary_i18n": {
        "en": "Flash-GRPO improves training efficiency for video diffusion models by addressing temporal variance and gradient inconsistency through iso-temporal grouping and temporal gradient rectification.",
        "zh-CN": "使用时序分组和时序梯度校正提升视频扩散模型训练效率",
        "ja": "Flash-GRPOは、等時グループ化と時系列勾配修正を通じて、時間的変動と勾配の不一致を解決し、ビデオ拡散モデルのトレーニング効率を向上させる。",
        "ko": "Flash-GRPO는 동적 변동성과 기울기 불일치를 해결하여 비디오 확산 모델의 학습 효율성을 개선합니다.",
        "es": "Flash-GRPO mejora la eficiencia de entrenamiento de modelos de difusión de video al abordar variabilidad temporal y inconsistencia de gradientes mediante agrupación iso-temporal y rectificación temporal de gradientes."
      }
    },
    {
      "arxivId": "2605.22536",
      "title": "SpaceDG: Benchmarking Spatial Intelligence under Visual Degradation",
      "summary": "SpaceDG dataset and benchmark evaluate multimodal language models' spatial reasoning robustness under visual degradations, revealing significant performance gaps and demonstrating improved robustness through targeted training.",
      "authors": [
        "Xiaolong Zhou",
        "Yifei Liu",
        "Ziyang Gong",
        "Jiarui Li",
        "Qiyue Zhao",
        "Muyao Niu"
      ],
      "organization": {
        "_id": "6938f59934ae2fe5939d023c",
        "name": "Visionary-Laboratoary",
        "fullname": "Visionary-Laboratoary",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/6938f4de790b5cd0f6df6462/e5oOSNUpzMTOQislDkn9n.png"
      },
      "publishedAt": "2026-05-21T00:00:00.000Z",
      "submittedAt": "2026-05-22T00:00:00.000Z",
      "upvotes": 21,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.22536.png",
      "arxivUrl": "https://arxiv.org/abs/2605.22536",
      "pdfUrl": "https://arxiv.org/pdf/2605.22536.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.22536",
      "githubRepo": "https://github.com/Visionary-Laboratory/SpaceDG",
      "githubStars": 24,
      "keywords": [
        "Multimodal Large Language Models",
        "spatial intelligence",
        "degradation-aware spatial understanding",
        "3D Gaussian Splatting",
        "visual degradation",
        "VQA instances"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "SpaceDG：视觉退化下的空间智能基准测试",
      "summary_zh": "构建SpaceDG数据集和基准，评估多模态语言模型在视觉退化下的空间推理鲁棒性，发现性能差距并验证针对性训练的有效性。",
      "title_i18n": {
        "en": "SpaceDG: Benchmarking Spatial Intelligence under Visual Degradation",
        "zh-CN": "SpaceDG：视觉退化下的空间智能基准测试",
        "ja": "SpaceDG: 視覚劣化下での空間知能のベンチマーキング",
        "ko": "SpaceDG: Benchmarking Spatial Intelligence under Visual Degradation",
        "es": "SpaceDG: Benchmarking Spatial Intelligence under Visual Degradation"
      },
      "summary_i18n": {
        "en": "SpaceDG dataset and benchmark evaluate multimodal language models' spatial reasoning robustness under visual degradations, revealing significant performance gaps and demonstrating improved robustness through targeted training.",
        "zh-CN": "构建SpaceDG数据集和基准，评估多模态语言模型在视觉退化下的空间推理鲁棒性，发现性能差距并验证针对性训练的有效性。",
        "ja": "SpaceDGデータセットとベンチマークは、視覚劣化下でのマルチモーダル言語モデルの空間推論のロバスト性を評価し、大きなパフォーマンスギャップを明らかにし、対象的なトレーニングによりロバスト性を向上させる。",
        "ko": "SpaceDG 데이터세트와 벤치마크는 시각적 손상 하에서 공간 지능을 평가합니다.",
        "es": "El conjunto de datos SpaceDG y el benchmark evalúan la robustez del razonamiento espacial en modelos multivía bajo degradación visual, revelando brechas de rendimiento y demostrando mayor robustez mediante entrenamiento específico."
      }
    },
    {
      "arxivId": "2605.21468",
      "title": "You Only Need Minimal RLVR Training: Extrapolating LLMs via Rank-1 Trajectories",
      "summary": "Reinforcement learning with verifiable rewards parameter trajectories exhibit low-rank structures that enable efficient extrapolation through a simple linear regression method, demonstrating superior performance with reduced computational requirements.",
      "authors": [
        "Zhepei Wei",
        "Xinyu Zhu",
        "Wei-Lin Chen",
        "Chengsong Huang",
        "Jiaxin Huang",
        "Yu Meng"
      ],
      "organization": null,
      "publishedAt": "2026-05-20T00:00:00.000Z",
      "submittedAt": "2026-05-21T00:00:00.000Z",
      "upvotes": 44,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.21468.png",
      "arxivUrl": "https://arxiv.org/abs/2605.21468",
      "pdfUrl": "https://arxiv.org/pdf/2605.21468.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.21468",
      "githubRepo": "https://github.com/weizhepei/RELEX",
      "githubStars": 9,
      "keywords": [
        "reinforcement learning with verifiable rewards",
        "parameter trajectories",
        "low-rank approximation",
        "rank-1 approximation",
        "linear regression",
        "extrapolation"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "只需最小RLVR训练：通过秩1轨迹外推大语言模型",
      "summary_zh": "通过秩1轨迹实现大语言模型的高效外推，使用简单线性回归方法，性能优越且计算需求低",
      "title_i18n": {
        "en": "You Only Need Minimal RLVR Training: Extrapolating LLMs via Rank-1 Trajectories",
        "zh-CN": "只需最小RLVR训练：通过秩1轨迹外推大语言模型",
        "ja": "You Only Need Minimal RLVR Training: Rank-1トレースによるLLMsの外挿",
        "ko": "You Only Need Minimal RLVR Training: Extrapolating LLMs via Rank-1 Trajectories",
        "es": "You Only Need Minimal RLVR Training: Extrapolating LLMs via Rank-1 Trajectories",
        "de": "You Only Need Minimal RLVR Training: Extrapolating LLMs via Rank-1 Trajectories"
      },
      "summary_i18n": {
        "en": "Reinforcement learning with verifiable rewards parameter trajectories exhibit low-rank structures that enable efficient extrapolation through a simple linear regression method, demonstrating superior performance with reduced computational requirements.",
        "zh-CN": "通过秩1轨迹实现大语言模型的高效外推，使用简单线性回归方法，性能优越且计算需求低",
        "ja": "検証可能な報酬パラメータトレースを持つ強化学習は、低ランク構造を持ち、単純な線形回帰法を通じて効率的な外挿を可能にし、計算要件を削減して優れたパフォーマンスを示す。",
        "ko": "검증 가능한 보상 파라미터 트랙젝터리는 낮은 랭크 구조를 가지며, 간단한 선형 회귀로 효율적으로 외삽됩니다.",
        "es": "La combinación de aprendizaje por refuerzo con trayectorias de recompensas verificables muestra estructuras de rango uno, permitiendo extrapolación eficiente mediante regresión lineal simple, con mejor rendimiento y menor uso computacional.",
        "de": "Reinforcement Learning mit verifizierbaren Belohnungsparameter-Trajektorien zeigt niedrige Rangstruktur, die effiziente Extrapolation ermöglicht."
      }
    },
    {
      "arxivId": "2605.14333",
      "title": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation",
      "summary": "InsightTok improves discrete visual tokenization for better text and face reconstruction through content-aware perceptual losses, enhancing autoregressive image generation quality.",
      "authors": [
        "Yang Yue",
        "Fangyun Wei",
        "Tianyu He",
        "Jinjing Zhao",
        "Zanlin Ni",
        "Zeyu Liu"
      ],
      "organization": {
        "_id": "69719700e3846c07669d13ee",
        "name": "Tsinghua-LeapLab",
        "fullname": "Tsinghua-LeapLab",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/63987ffb2ceb55aabe0852f3/hflTWNTGxeJx83xNkYrDB.png"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 32,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14333.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14333",
      "pdfUrl": "https://arxiv.org/pdf/2605.14333.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14333",
      "githubRepo": "https://github.com/LeapLabTHU/InsightTok",
      "githubStars": 33,
      "keywords": [
        "discrete visual tokenization",
        "autoregressive generators",
        "tokenizer",
        "discrete-tokenizer objectives",
        "text legibility",
        "facial fidelity"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "InsightTok：改进离散标记化中的文本和人脸保真度",
      "summary_zh": "通过内容感知的感知损失提升离散视觉标记化，改善自回归图像生成的文本和人脸重建质量。",
      "title_i18n": {
        "en": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation",
        "zh-CN": "InsightTok：改进离散标记化中的文本和人脸保真度",
        "ja": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation",
        "ko": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation",
        "es": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation",
        "de": "InsightTok: Improving Text and Face Fidelity in Discrete Tokenization for Autoregressive Image Generation"
      },
      "summary_i18n": {
        "en": "InsightTok improves discrete visual tokenization for better text and face reconstruction through content-aware perceptual losses, enhancing autoregressive image generation quality.",
        "zh-CN": "通过内容感知的感知损失提升离散视觉标记化，改善自回归图像生成的文本和人脸重建质量。",
        "ja": "InsightTokは、コンテンツに応じた知覚損失を通じて、テキストおよび顔の再構築を改善し、自己回帰画像生成の品質を向上させます。",
        "ko": "InsightTok은 내용 인식 망각 손실을 통해 텍스트 및 얼굴 재구성을 개선하여 자동 회귀 이미지 생성의 품질을 높입니다.",
        "es": "InsightTok mejora la tokenización visual discreta para una mejor reconstrucción de texto y rostros mediante pérdidas perceptuales conscientes del contenido, mejorando la calidad de generación de imágenes autoregresivas.",
        "de": "InsightTok verbessert die diskrete visuelle Tokenisierung für bessere Text- und Gesichtsrekonstruktion durch inhaltsbewusste perceptuelle Verluste, was die Qualität der autoregressiven Bildgenerierung erhöht."
      }
    },
    {
      "arxivId": "2605.14278",
      "title": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration",
      "summary": "ODENative online GRPO framework KVPO aligns streaming video generators with human preferences through causal-semantic exploration and velocity-field surrogate policy based on trajectory velocity energy.",
      "authors": [
        "Ruicheng Zhang",
        "Kaixi Cong",
        "Jun Zhou",
        "Zhizhou Zhong",
        "Zunnan Xu",
        "Shuiyang Mao"
      ],
      "organization": {
        "_id": "64cc8e9b214a472dd85e7e1d",
        "name": "THU1911",
        "fullname": "Tsinghua University",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/61f8e5934a8e5a275b2b3e5a/oKO6FK_rTzzPHXihicZou.jpeg"
      },
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 37,
      "comments": 2,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.14278.png",
      "arxivUrl": "https://arxiv.org/abs/2605.14278",
      "pdfUrl": "https://arxiv.org/pdf/2605.14278.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.14278",
      "githubRepo": "https://github.com/Richard-Zhang-AI/KVPO",
      "githubStars": 18,
      "keywords": [
        "streaming autoregressive video generators",
        "reinforcement learning",
        "noise-based exploration",
        "SDE-based surrogate policies",
        "ODE dynamics",
        "distilled AR models"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "KVPO：基于KV语义探索的ODE原生GRPO方法",
      "summary_zh": "提出KVPO方法，通过因果语义探索和速度场代理策略实现视频生成与人类偏好的对齐。",
      "title_i18n": {
        "en": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration",
        "zh-CN": "KVPO：基于KV语义探索的ODE原生GRPO方法",
        "ja": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration",
        "ko": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration",
        "es": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration",
        "de": "KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration"
      },
      "summary_i18n": {
        "en": "ODENative online GRPO framework KVPO aligns streaming video generators with human preferences through causal-semantic exploration and velocity-field surrogate policy based on trajectory velocity energy.",
        "zh-CN": "提出KVPO方法，通过因果语义探索和速度场代理策略实现视频生成与人类偏好的对齐。",
        "ja": "ODENativeオンラインGRPOフレームワークKVPOは、因果的意味探索と軌道速度エネルギーに基づく速度場代替ポリシーにより、動画ジェネレーターを人間の好みに合わせます。",
        "ko": "ODENative 온라인 GRPO 프레임워크 KVPO는 궤적 속도 에너지를 기반으로 인과-의미 탐색과 속도장 대체 정책을 통해 동영상 생성기를 인간의 선호에 맞춥니다.",
        "es": "El marco ODE-Native online GRPO KVPO alinea generadores de video en tiempo real con preferencias humanas mediante exploración causal-semántica y política de campo de velocidad basada en energía de trayectoria.",
        "de": "ODENative-Online-GRPO-Framework KVPO aligniert Streaming-Video-Generatoren mit menschlichen Präferenzen durch kausale-semantische Exploration und Geschwindigkeitsfeld-Surrogatpolitik basierend auf Trajektorien-Energie."
      }
    },
    {
      "arxivId": "2605.15256",
      "title": "ReactiveGWM: Steering NPC in Reactive Game World Models",
      "summary": "ReactiveGWM enables dynamic player-NPC interactions in game worlds by decoupling player controls from NPC behaviors through diffusion models with cross-attention modules for game-agnostic strategy transfer.",
      "authors": [
        "Zeqing Wang",
        "Danze Chen",
        "Zhaohu Xing",
        "Zizhao Tong",
        "Yinhan Zhang",
        "Xingyi Yang"
      ],
      "organization": null,
      "publishedAt": "2026-05-14T00:00:00.000Z",
      "submittedAt": "2026-05-18T00:00:00.000Z",
      "upvotes": 28,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.15256.png",
      "arxivUrl": "https://arxiv.org/abs/2605.15256",
      "pdfUrl": "https://arxiv.org/pdf/2605.15256.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.15256",
      "githubRepo": "https://github.com/INV-WZQ/ReactiveGWM",
      "githubStars": 46,
      "keywords": [
        "diffusion models",
        "cross-attention modules",
        "game-agnostic representation",
        "zero-shot strategy transfer",
        "player controls",
        "NPC behaviors"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "ReactiveGWM：在反应式游戏世界模型中引导NPC",
      "summary_zh": "通过扩散模型与交叉注意力模块解耦玩家控制与NPC行为，实现跨游戏策略迁移",
      "title_i18n": {
        "en": "ReactiveGWM: Steering NPC in Reactive Game World Models",
        "zh-CN": "ReactiveGWM：在反应式游戏世界模型中引导NPC",
        "ja": "ReactiveGWM: Steering NPC in Reactive Game World Models",
        "ko": "ReactiveGWM: Steering NPC in Reactive Game World Models",
        "es": "ReactiveGWM: Steering NPC in Reactive Game World Models",
        "de": "ReactiveGWM: Steering NPC in Reactive Game World Models"
      },
      "summary_i18n": {
        "en": "ReactiveGWM enables dynamic player-NPC interactions in game worlds by decoupling player controls from NPC behaviors through diffusion models with cross-attention modules for game-agnostic strategy transfer.",
        "zh-CN": "通过扩散模型与交叉注意力模块解耦玩家控制与NPC行为，实现跨游戏策略迁移",
        "ja": "ReactiveGWMは、ゲームに依存しない戦略転送のためのクロスアテンションモジュールを備えた拡散モデルを通じて、プレイヤーとNPCの相互作用を動的に可能にします。",
        "ko": "ReactiveGWM은 게임 세계 모델에서 플레이어-NPC 상호작용을 동적으로 가능하게 하며, 교차 주의 모듈을 사용하여 전략 전달을 게임에 관계없이 수행합니다.",
        "es": "ReactiveGWM permite interacciones dinámicas entre jugadores y personajes no jugables en mundos de juego al desacoplar controles de jugadores de comportamientos de NPCs mediante modelos de difusión con módulos de atención cruzada.",
        "de": "ReactiveGWM ermöglicht dynamische Spieler-NPC-Interaktionen in Spielweltmodellen durch Entkoppelung von Spielersteuerung und NPC-Verhalten mit Diffusionsmodellen und Kreuz-Attention-Modulen für strategische Übertragung."
      }
    },
    {
      "arxivId": "2605.18643",
      "title": "Post-Trained MoE Can Skip Half Experts via Self-Distillation",
      "summary": "Zero-Expert Self-Distillation Adaptation (ZEDA) enables efficient dynamic Mixture-of-Experts models by converting static models into adaptive ones with reduced computational costs and improved inference speed.",
      "authors": [
        "Xingtai Lv",
        "Li Sheng",
        "Kaiyan Zhang",
        "Yichen You",
        "Siyan Gao",
        "Xueheng Luo"
      ],
      "organization": {
        "_id": "64cc8e9b214a472dd85e7e1d",
        "name": "THU1911",
        "fullname": "Tsinghua University",
        "avatar": "https://cdn-avatars.huggingface.co/v1/production/uploads/61f8e5934a8e5a275b2b3e5a/oKO6FK_rTzzPHXihicZou.jpeg"
      },
      "publishedAt": "2026-05-18T00:00:00.000Z",
      "submittedAt": "2026-05-19T00:00:00.000Z",
      "upvotes": 29,
      "comments": 1,
      "thumbnail": "https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2605.18643.png",
      "arxivUrl": "https://arxiv.org/abs/2605.18643",
      "pdfUrl": "https://arxiv.org/pdf/2605.18643.pdf",
      "hfUrl": "https://huggingface.co/papers/2605.18643",
      "githubRepo": "https://github.com/TsinghuaC3I/ZEDA",
      "githubStars": 28,
      "keywords": [
        "Mixture-of-Experts",
        "sparse expert activation",
        "dynamic MoE",
        "self-distillation",
        "parameter-free zero-output experts",
        "group-level balancing loss"
      ],
      "source": "HF Daily Papers",
      "sourceType": "hf",
      "title_zh": "后训练MoE通过自蒸馏跳过一半专家",
      "summary_zh": "使用自蒸馏方法将静态MoE转换为动态模型，降低计算成本并提升推理速度",
      "title_i18n": {
        "en": "Post-Trained MoE Can Skip Half Experts via Self-Distillation",
        "zh-CN": "后训练MoE通过自蒸馏跳过一半专家",
        "ja": "Post-Trained MoE Can Skip Half Experts via Self-Distillation",
        "ko": "Post-Trained MoE Can Skip Half Experts via Self-Distillation",
        "es": "Post-Trained MoE Can Skip Half Experts via Self-Distillation",
        "de": "Post-Trained MoE Can Skip Half Experts via Self-Distillation"
      },
      "summary_i18n": {
        "en": "Zero-Expert Self-Distillation Adaptation (ZEDA) enables efficient dynamic Mixture-of-Experts models by converting static models into adaptive ones with reduced computational costs and improved inference speed.",
        "zh-CN": "使用自蒸馏方法将静态MoE转换为动态模型，降低计算成本并提升推理速度",
        "ja": "ゼロエクスパート自己教示適応（ZEDA）は、静的なモデルを適応可能なモデルに変換することで、計算コストを削減し、推論速度を向上させます。",
        "ko": "Zero-Expert Self-Distillation Adaptation (ZEDA)는 계산 비용을 줄이고 추론 속도를 개선하여 정적 모델을 적응형 모델로 변환합니다.",
        "es": "Zero-Expert Self-Distillation Adaptation (ZEDA) permite modelos eficientes Mixture-of-Experts dinámicos convirtiendo modelos estáticos en adaptativos con costos computacionales reducidos y mayor velocidad de inferencia.",
        "de": "Zero-Expert Self-Distillation Adaptation (ZEDA) ermöglicht effiziente dynamische Mixture-of-Experts-Modelle durch Umwandlung statischer Modelle in adaptive Modelle mit reduzierten Rechenkosten und verbesserter Inferenzgeschwindigkeit."
      }
    }
  ],
  "i18nUpdatedAt": "2026-05-23T05:45:53.929Z",
  "locales": [
    "en",
    "zh-CN",
    "ja",
    "ko",
    "es",
    "de"
  ]
}
