{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":689773665,"defaultBranch":"main","name":"llamafile","ownerLogin":"Mozilla-Ocho","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-09-10T21:12:32.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/117940224?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1724312794.0","currentOid":""},"activityList":{"items":[{"before":"ea39f2654f58036f1eafc7d6985cd8580e89c807","after":"66a84d8aea2990895fc4f64786406fea64e79197","ref":"refs/heads/main","pushedAt":"2024-08-31T23:01:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Improve the pool","shortMessageHtmlLink":"Improve the pool"}},{"before":"c7c4d65d8e4a95ed59e1aef000d897a073ea473b","after":"ea39f2654f58036f1eafc7d6985cd8580e89c807","ref":"refs/heads/main","pushedAt":"2024-08-31T06:20:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Upgrade to Cosmopolitan v3.8.0\n\nThe latest cosmocc allows us to use Clang for compilation which improves\nbuild latency for this project by 3x.","shortMessageHtmlLink":"Upgrade to Cosmopolitan v3.8.0"}},{"before":"d2be7328733703c90b83c34a03fb818a4cc31aa9","after":"c7c4d65d8e4a95ed59e1aef000d897a073ea473b","ref":"refs/heads/main","pushedAt":"2024-08-29T02:11:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Speed up KV in llamafile-bench","shortMessageHtmlLink":"Speed up KV in llamafile-bench"}},{"before":"8685f67095caa8615df92fa90572b4c414b97391","after":"d2be7328733703c90b83c34a03fb818a4cc31aa9","ref":"refs/heads/main","pushedAt":"2024-08-26T20:04:45.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Ignore --repeat-penalty in server","shortMessageHtmlLink":"Ignore --repeat-penalty in server"}},{"before":"2da8d803a521d5c9b15faa2f2fc0383572724b7d","after":"4d5d1854ca0cae70d62a275ad7e824bcfdf7151a","ref":"refs/heads/fp8","pushedAt":"2024-08-25T05:30:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add disclaimer to code","shortMessageHtmlLink":"Add disclaimer to code"}},{"before":"9662d430ef023b0e4c4c64e1e1db274ad43e693f","after":"2da8d803a521d5c9b15faa2f2fc0383572724b7d","ref":"refs/heads/fp8","pushedAt":"2024-08-25T05:02:05.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Improve FP8->BF16 code (121 tok/sec)","shortMessageHtmlLink":"Improve FP8->BF16 code (121 tok/sec)"}},{"before":"2c940da1a2dfcc6922d18648aac42f41da4eeb61","after":"8685f67095caa8615df92fa90572b4c414b97391","ref":"refs/heads/main","pushedAt":"2024-08-25T04:24:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix build","shortMessageHtmlLink":"Fix build"}},{"before":"98eff09ecc472f133c2c8cc6a6258f18899f37a0","after":"2c940da1a2dfcc6922d18648aac42f41da4eeb61","ref":"refs/heads/main","pushedAt":"2024-08-25T02:39:46.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Make replace_all() have linear complexity","shortMessageHtmlLink":"Make replace_all() have linear complexity"}},{"before":"c44664b4b251b65d03d9c47919e0fa6b9d63c520","after":"98eff09ecc472f133c2c8cc6a6258f18899f37a0","ref":"refs/heads/main","pushedAt":"2024-08-24T14:16:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Quantize TriLM models using Q2_K_S (#552)","shortMessageHtmlLink":"Quantize TriLM models using Q2_K_S (#552)"}},{"before":"53d1990a1616416c59ff92bb9838cca01182b881","after":"9662d430ef023b0e4c4c64e1e1db274ad43e693f","ref":"refs/heads/fp8","pushedAt":"2024-08-24T08:19:21.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix something","shortMessageHtmlLink":"Fix something"}},{"before":"1e608265334937ee790efb858be04c6b0f777bdb","after":"53d1990a1616416c59ff92bb9838cca01182b881","ref":"refs/heads/fp8","pushedAt":"2024-08-24T08:19:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix something","shortMessageHtmlLink":"Fix something"}},{"before":"2eac9ad8a8a74e3dd15d698ce6522a68fbe8b303","after":"1e608265334937ee790efb858be04c6b0f777bdb","ref":"refs/heads/fp8","pushedAt":"2024-08-24T07:32:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add AVX2 impl","shortMessageHtmlLink":"Add AVX2 impl"}},{"before":"b6a38c14bc3be79b3d7fa2fb48f2a008e69378f9","after":"2eac9ad8a8a74e3dd15d698ce6522a68fbe8b303","ref":"refs/heads/fp8","pushedAt":"2024-08-24T07:12:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Provide avx512f fallback","shortMessageHtmlLink":"Provide avx512f fallback"}},{"before":"78571d8b588417ecf45543cbebd2d25ad424c238","after":"b6a38c14bc3be79b3d7fa2fb48f2a008e69378f9","ref":"refs/heads/fp8","pushedAt":"2024-08-24T06:58:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Introduce better fp8 to fp32 conversion\n\nSee https://github.com/Mozilla-Ocho/llamafile/discussions/549","shortMessageHtmlLink":"Introduce better fp8 to fp32 conversion"}},{"before":"13d8706962496822bd3befb78a0e8638ee9c5ffc","after":"78571d8b588417ecf45543cbebd2d25ad424c238","ref":"refs/heads/fp8","pushedAt":"2024-08-24T06:21:05.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Introduce bf16 transcribing code","shortMessageHtmlLink":"Introduce bf16 transcribing code"}},{"before":"52d042ff0c2ad470ecdc73f8a8d4827afb22a086","after":"13d8706962496822bd3befb78a0e8638ee9c5ffc","ref":"refs/heads/fp8","pushedAt":"2024-08-24T05:30:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Introduce bf16 transcribing code","shortMessageHtmlLink":"Introduce bf16 transcribing code"}},{"before":"2e4de879166f61a2094481d677fee60d017a1dfd","after":"52d042ff0c2ad470ecdc73f8a8d4827afb22a086","ref":"refs/heads/fp8","pushedAt":"2024-08-24T04:29:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Boost fp8 prefill performance by 33 percent\n\n- F8 goes 127 tok/sec on my znver4 threadripper\n- F16 goes 258 tok/sec on my znver4 threadripper\n- BF16 goes 381 tok/sec on my znver4 threadripper","shortMessageHtmlLink":"Boost fp8 prefill performance by 33 percent"}},{"before":"ca063eaf4c38c3108fa21d69612b95f157821ecd","after":"2e4de879166f61a2094481d677fee60d017a1dfd","ref":"refs/heads/fp8","pushedAt":"2024-08-23T02:56:27.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Clean up fp8 code","shortMessageHtmlLink":"Clean up fp8 code"}},{"before":"c10a65c6e29f8355089be62f072c66397e0927e1","after":"ca063eaf4c38c3108fa21d69612b95f157821ecd","ref":"refs/heads/fp8","pushedAt":"2024-08-23T02:56:19.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Clean up fp8 code","shortMessageHtmlLink":"Clean up fp8 code"}},{"before":"b81b5906686b94c70c1809eac6aada0c7469e8d9","after":"c10a65c6e29f8355089be62f072c66397e0927e1","ref":"refs/heads/fp8","pushedAt":"2024-08-23T02:03:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add vectorized fp8 code for avx512","shortMessageHtmlLink":"Add vectorized fp8 code for avx512"}},{"before":"42fa4226502132f456329d1b87a032fa82739375","after":"b81b5906686b94c70c1809eac6aada0c7469e8d9","ref":"refs/heads/fp8","pushedAt":"2024-08-22T18:20:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Address review comment","shortMessageHtmlLink":"Address review comment"}},{"before":null,"after":"42fa4226502132f456329d1b87a032fa82739375","ref":"refs/heads/fp8","pushedAt":"2024-08-22T07:46:34.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Add support for FP8 (E4M3)\n\nSee https://github.com/Mozilla-Ocho/llamafile/discussions/549","shortMessageHtmlLink":"Add support for FP8 (E4M3)"}},{"before":"2f1d558b14c5fef3f4cb43dee4e3fd6aebe95b07","after":"01cdfbd9cc2ff8842ae10020ee2b339e5bc58c9c","ref":"refs/heads/llama-matmul","pushedAt":"2024-08-22T00:38:22.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Try broken experiment with C","shortMessageHtmlLink":"Try broken experiment with C"}},{"before":"d6a505c03b7852e24b676dfd138afd9ebac3b6ad","after":"2f1d558b14c5fef3f4cb43dee4e3fd6aebe95b07","ref":"refs/heads/llama-matmul","pushedAt":"2024-08-21T20:39:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Try broken experiment with C","shortMessageHtmlLink":"Try broken experiment with C"}},{"before":"6287b601e50169c31a4aa45900ccd008ffda0e38","after":"c44664b4b251b65d03d9c47919e0fa6b9d63c520","ref":"refs/heads/main","pushedAt":"2024-08-21T03:41:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Always favor fp16 arithmetic in tinyBLAS\n\nIt was assumed earlier that upcasting would help precision. However this\nwasn't the case, according to levenshtein distance in whisperfile output\nwhich tells me this change makes things objectively better in noticeable\nways. So we now avoid the fp16 conversion, when the ISA is available. It\nshould be perfectly safe and accurate, even for large sums, since we now\nhave the ruler reduction divide and conquer approach, in tinyBLAS::gemm.","shortMessageHtmlLink":"Always favor fp16 arithmetic in tinyBLAS"}},{"before":"6230c840fe1517e820fdf99c57260274d1e10e27","after":"d6a505c03b7852e24b676dfd138afd9ebac3b6ad","ref":"refs/heads/llama-matmul","pushedAt":"2024-08-20T03:08:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Get new algorithm fully working\n\nI'm reasonably confident this implementation is solid. It now supports\nFP32, FP16, and BF16. I even found some new optimizations. Sadly it is\nstill going half the speed of tinyBLAS on my workstation. I tried with\nbig models and big prompts. It might just be that it's using AVX2, and\ntinyBLAS is using AVX512. Still, I would have liked to see the speedup\nthat the microbenchmarks reported. I wish I understood, why this isn't\nmeeting expectations so far.","shortMessageHtmlLink":"Get new algorithm fully working"}},{"before":"8a5a7242c51a2142a6e3a0522a52d50e0d3bc723","after":"6287b601e50169c31a4aa45900ccd008ffda0e38","ref":"refs/heads/main","pushedAt":"2024-08-19T23:18:45.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Update markdown documentation on audio conversion","shortMessageHtmlLink":"Update markdown documentation on audio conversion"}},{"before":"dc99002cb55abaddff39f24faccb6857c3bafc59","after":"8a5a7242c51a2142a6e3a0522a52d50e0d3bc723","ref":"refs/heads/main","pushedAt":"2024-08-19T23:13:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Improve whisperfile flag handling slightly","shortMessageHtmlLink":"Improve whisperfile flag handling slightly"}},{"before":"dab2317f6b2d64e6bf6b1e5d95fe102af1886d01","after":"dc99002cb55abaddff39f24faccb6857c3bafc59","ref":"refs/heads/main","pushedAt":"2024-08-19T23:11:51.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Convert audio files (mp3/flac/ogg) to 16khz wav\n\nIt's no longer necessary to run sox or ffmpeg beforehand, when using the\nwhisperfile command. If you're audio file isn't in the preferred format,\nit'll be converted for you automatically using the embedded audio tools.","shortMessageHtmlLink":"Convert audio files (mp3/flac/ogg) to 16khz wav"}},{"before":"c8b483f9912435967bb96dc643fb36f5a14d4456","after":"dab2317f6b2d64e6bf6b1e5d95fe102af1886d01","ref":"refs/heads/main","pushedAt":"2024-08-19T20:37:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jart","name":"Justine Tunney","path":"/jart","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/49262?s=80&v=4"},"commit":{"message":"Fix bug in whisperfile man page.","shortMessageHtmlLink":"Fix bug in whisperfile man page."}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEqZN1TQA","startCursor":null,"endCursor":null}},"title":"Activity ยท Mozilla-Ocho/llamafile"}