@calclavia
25Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.
steemit.com/@calclaviaVOTING POWER100.00%
DOWNVOTE POWER100.00%
RESOURCE CREDITS100.00%
REPUTATION PROGRESS0.00%
Net Worth
0.042USD
STEEM
0.001STEEM
SBD
0.012SBD
Effective Power
5.008SP
├── Own SP
0.629SP
└── Incoming DelegationsDeleg
+4.379SP
Detailed Balance
| STEEM | ||
| balance | 0.001STEEM | STEEM |
| market_balance | 0.000STEEM | STEEM |
| savings_balance | 0.000STEEM | STEEM |
| reward_steem_balance | 0.000STEEM | STEEM |
| STEEM POWER | ||
| Own SP | 0.629SP | SP |
| Delegated Out | 0.000SP | SP |
| Delegation In | 4.379SP | SP |
| Effective Power | 5.008SP | SP |
| Reward SP (pending) | 0.003SP | SP |
| SBD | ||
| sbd_balance | 0.000SBD | SBD |
| sbd_conversions | 0.000SBD | SBD |
| sbd_market_balance | 0.000SBD | SBD |
| savings_sbd_balance | 0.000SBD | SBD |
| reward_sbd_balance | 0.012SBD | SBD |
{
"balance": "0.001 STEEM",
"savings_balance": "0.000 STEEM",
"reward_steem_balance": "0.000 STEEM",
"vesting_shares": "1023.117226 VESTS",
"delegated_vesting_shares": "0.000000 VESTS",
"received_vesting_shares": "7120.542580 VESTS",
"sbd_balance": "0.000 SBD",
"savings_sbd_balance": "0.000 SBD",
"reward_sbd_balance": "0.012 SBD",
"conversions": []
}Account Info
| name | calclavia |
| id | 692597 |
| rank | 615,408 |
| reputation | 63962625 |
| created | 2018-01-29T23:50:21 |
| recovery_account | steem |
| proxy | None |
| post_count | 2 |
| comment_count | 0 |
| lifetime_vote_count | 0 |
| witnesses_voted_for | 0 |
| last_post | 2018-01-30T00:19:15 |
| last_root_post | 2018-01-30T00:19:15 |
| last_vote_time | 2018-01-31T01:43:15 |
| proxied_vsf_votes | 0, 0, 0, 0 |
| can_vote | 1 |
| voting_power | 0 |
| delayed_votes | 0 |
| balance | 0.001 STEEM |
| savings_balance | 0.000 STEEM |
| sbd_balance | 0.000 SBD |
| savings_sbd_balance | 0.000 SBD |
| vesting_shares | 1023.117226 VESTS |
| delegated_vesting_shares | 0.000000 VESTS |
| received_vesting_shares | 7120.542580 VESTS |
| reward_vesting_balance | 6.136539 VESTS |
| vesting_balance | 0.000 STEEM |
| vesting_withdraw_rate | 0.000000 VESTS |
| next_vesting_withdrawal | 1969-12-31T23:59:59 |
| withdrawn | 0 |
| to_withdraw | 0 |
| withdraw_routes | 0 |
| savings_withdraw_requests | 0 |
| last_account_recovery | 1970-01-01T00:00:00 |
| reset_account | null |
| last_owner_update | 1970-01-01T00:00:00 |
| last_account_update | 2018-01-30T00:33:30 |
| mined | No |
| sbd_seconds | 0 |
| sbd_last_interest_payment | 1970-01-01T00:00:00 |
| savings_sbd_last_interest_payment | 1970-01-01T00:00:00 |
{
"id": 692597,
"name": "calclavia",
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [
[
"dtube.app",
1
]
],
"key_auths": [
[
"STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
1
]
]
},
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}",
"posting_json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}",
"proxy": "",
"last_owner_update": "1970-01-01T00:00:00",
"last_account_update": "2018-01-30T00:33:30",
"created": "2018-01-29T23:50:21",
"mined": false,
"recovery_account": "steem",
"last_account_recovery": "1970-01-01T00:00:00",
"reset_account": "null",
"comment_count": 0,
"lifetime_vote_count": 0,
"post_count": 2,
"can_vote": true,
"voting_manabar": {
"current_mana": "8143659806",
"last_update_time": 1779056778
},
"downvote_manabar": {
"current_mana": 2035914951,
"last_update_time": 1779056778
},
"voting_power": 0,
"balance": "0.001 STEEM",
"savings_balance": "0.000 STEEM",
"sbd_balance": "0.000 SBD",
"sbd_seconds": "0",
"sbd_seconds_last_update": "1970-01-01T00:00:00",
"sbd_last_interest_payment": "1970-01-01T00:00:00",
"savings_sbd_balance": "0.000 SBD",
"savings_sbd_seconds": "0",
"savings_sbd_seconds_last_update": "1970-01-01T00:00:00",
"savings_sbd_last_interest_payment": "1970-01-01T00:00:00",
"savings_withdraw_requests": 0,
"reward_sbd_balance": "0.012 SBD",
"reward_steem_balance": "0.000 STEEM",
"reward_vesting_balance": "6.136539 VESTS",
"reward_vesting_steem": "0.003 STEEM",
"vesting_shares": "1023.117226 VESTS",
"delegated_vesting_shares": "0.000000 VESTS",
"received_vesting_shares": "7120.542580 VESTS",
"vesting_withdraw_rate": "0.000000 VESTS",
"next_vesting_withdrawal": "1969-12-31T23:59:59",
"withdrawn": 0,
"to_withdraw": 0,
"withdraw_routes": 0,
"curation_rewards": 0,
"posting_rewards": 6,
"proxied_vsf_votes": [
0,
0,
0,
0
],
"witnesses_voted_for": 0,
"last_post": "2018-01-30T00:19:15",
"last_root_post": "2018-01-30T00:19:15",
"last_vote_time": "2018-01-31T01:43:15",
"post_bandwidth": 0,
"pending_claimed_accounts": 0,
"vesting_balance": "0.000 STEEM",
"reputation": 63962625,
"transfer_history": [],
"market_history": [],
"post_history": [],
"vote_history": [],
"other_history": [],
"witness_votes": [],
"tags_usage": [],
"guest_bloggers": [],
"rank": 615408
}Withdraw Routes
| Incoming | Outgoing |
|---|---|
Empty | Empty |
{
"incoming": [],
"outgoing": []
}From Date
To Date
steemdelegated 4.379 SP to @calclavia2026/05/17 22:26:18
steemdelegated 4.379 SP to @calclavia
2026/05/17 22:26:18
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 7120.542580 VESTS |
| Transaction Info | Block #106141280/Trx c12ec85524d57ce045b2b53e2d3b2ca961879844 |
View Raw JSON Data
{
"trx_id": "c12ec85524d57ce045b2b53e2d3b2ca961879844",
"block": 106141280,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-05-17T22:26:18",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "7120.542580 VESTS"
}
]
}steemdelegated 2.711 SP to @calclavia2026/05/11 20:36:45
steemdelegated 2.711 SP to @calclavia
2026/05/11 20:36:45
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 4408.332175 VESTS |
| Transaction Info | Block #105967061/Trx 2ae526e97f5aab89f5ab090883b2a81c2c26d4c4 |
View Raw JSON Data
{
"trx_id": "2ae526e97f5aab89f5ab090883b2a81c2c26d4c4",
"block": 105967061,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-05-11T20:36:45",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "4408.332175 VESTS"
}
]
}steemdelegated 4.386 SP to @calclavia2026/04/25 21:50:03
steemdelegated 4.386 SP to @calclavia
2026/04/25 21:50:03
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 7133.058336 VESTS |
| Transaction Info | Block #105508984/Trx bfe71fbd5f03781dcec33fb31b4580761626c775 |
View Raw JSON Data
{
"trx_id": "bfe71fbd5f03781dcec33fb31b4580761626c775",
"block": 105508984,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-04-25T21:50:03",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "7133.058336 VESTS"
}
]
}steemdelegated 2.736 SP to @calclavia2026/01/23 03:01:15
steemdelegated 2.736 SP to @calclavia
2026/01/23 03:01:15
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 4449.878994 VESTS |
| Transaction Info | Block #102846089/Trx aed9118bc6ef54bdbf9c09536d46d8b9ff2bb2c4 |
View Raw JSON Data
{
"trx_id": "aed9118bc6ef54bdbf9c09536d46d8b9ff2bb2c4",
"block": 102846089,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-01-23T03:01:15",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "4449.878994 VESTS"
}
]
}steemdelegated 2.837 SP to @calclavia2024/12/16 22:20:36
steemdelegated 2.837 SP to @calclavia
2024/12/16 22:20:36
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 4614.098191 VESTS |
| Transaction Info | Block #91292496/Trx cde2d6bb195d33b6bf2cd2128257e0c34e038341 |
View Raw JSON Data
{
"trx_id": "cde2d6bb195d33b6bf2cd2128257e0c34e038341",
"block": 91292496,
"trx_in_block": 4,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2024-12-16T22:20:36",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "4614.098191 VESTS"
}
]
}steemdelegated 2.941 SP to @calclavia2023/11/13 14:05:36
steemdelegated 2.941 SP to @calclavia
2023/11/13 14:05:36
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 4783.231723 VESTS |
| Transaction Info | Block #79846757/Trx 072f0719c62d1bf8abd308ddcbca538b95c55940 |
View Raw JSON Data
{
"trx_id": "072f0719c62d1bf8abd308ddcbca538b95c55940",
"block": 79846757,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2023-11-13T14:05:36",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "4783.231723 VESTS"
}
]
}steemdelegated 4.748 SP to @calclavia2023/09/21 19:43:21
steemdelegated 4.748 SP to @calclavia
2023/09/21 19:43:21
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 7720.510509 VESTS |
| Transaction Info | Block #78345313/Trx 8c9a5a09da1bd8a440fc027dc520771f5116cef2 |
View Raw JSON Data
{
"trx_id": "8c9a5a09da1bd8a440fc027dc520771f5116cef2",
"block": 78345313,
"trx_in_block": 6,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2023-09-21T19:43:21",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "7720.510509 VESTS"
}
]
}steemdelegated 4.884 SP to @calclavia2022/11/03 09:45:09
steemdelegated 4.884 SP to @calclavia
2022/11/03 09:45:09
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 7942.191947 VESTS |
| Transaction Info | Block #69110932/Trx 0b475fce1d5f86534a6aa4c64d431ccf02777444 |
View Raw JSON Data
{
"trx_id": "0b475fce1d5f86534a6aa4c64d431ccf02777444",
"block": 69110932,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2022-11-03T09:45:09",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "7942.191947 VESTS"
}
]
}steemdelegated 5.019 SP to @calclavia2022/01/17 09:10:39
steemdelegated 5.019 SP to @calclavia
2022/01/17 09:10:39
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8162.725178 VESTS |
| Transaction Info | Block #60807293/Trx 3a77b7559471556f8b164b0c3b0ee44f300d9b85 |
View Raw JSON Data
{
"trx_id": "3a77b7559471556f8b164b0c3b0ee44f300d9b85",
"block": 60807293,
"trx_in_block": 3,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2022-01-17T09:10:39",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8162.725178 VESTS"
}
]
}steemdelegated 5.132 SP to @calclavia2021/06/13 23:10:12
steemdelegated 5.132 SP to @calclavia
2021/06/13 23:10:12
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8346.493836 VESTS |
| Transaction Info | Block #54605769/Trx 104a8f9c7314609e1d3f7f9d07dda7aa12e6d397 |
View Raw JSON Data
{
"trx_id": "104a8f9c7314609e1d3f7f9d07dda7aa12e6d397",
"block": 54605769,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2021-06-13T23:10:12",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8346.493836 VESTS"
}
]
}steemdelegated 5.248 SP to @calclavia2020/12/11 09:31:24
steemdelegated 5.248 SP to @calclavia
2020/12/11 09:31:24
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8533.915810 VESTS |
| Transaction Info | Block #49353292/Trx 1469c3393558c7c1e13519ab118d29e7864dd9da |
View Raw JSON Data
{
"trx_id": "1469c3393558c7c1e13519ab118d29e7864dd9da",
"block": 49353292,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-11T09:31:24",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8533.915810 VESTS"
}
]
}steemdelegated 1.176 SP to @calclavia2020/12/06 03:08:51
steemdelegated 1.176 SP to @calclavia
2020/12/06 03:08:51
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 1912.543513 VESTS |
| Transaction Info | Block #49204861/Trx 2d2ba7bc5992321738df562fa3dd5a1a49d2e398 |
View Raw JSON Data
{
"trx_id": "2d2ba7bc5992321738df562fa3dd5a1a49d2e398",
"block": 49204861,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-06T03:08:51",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "1912.543513 VESTS"
}
]
}steemdelegated 5.252 SP to @calclavia2020/12/05 11:05:48
steemdelegated 5.252 SP to @calclavia
2020/12/05 11:05:48
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8540.282449 VESTS |
| Transaction Info | Block #49185966/Trx a9977412ac6a8abe15cf0acd66b698ef180d64fd |
View Raw JSON Data
{
"trx_id": "a9977412ac6a8abe15cf0acd66b698ef180d64fd",
"block": 49185966,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-05T11:05:48",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8540.282449 VESTS"
}
]
}steemdelegated 1.181 SP to @calclavia2020/11/02 12:07:27
steemdelegated 1.181 SP to @calclavia
2020/11/02 12:07:27
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 1920.017158 VESTS |
| Transaction Info | Block #48253665/Trx c0973de678815780534629c63076c5f3d08ee559 |
View Raw JSON Data
{
"trx_id": "c0973de678815780534629c63076c5f3d08ee559",
"block": 48253665,
"trx_in_block": 4,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-11-02T12:07:27",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "1920.017158 VESTS"
}
]
}steemdelegated 5.376 SP to @calclavia2020/05/09 04:04:12
steemdelegated 5.376 SP to @calclavia
2020/05/09 04:04:12
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8742.929023 VESTS |
| Transaction Info | Block #43215081/Trx 1fc0a910736bbc5a2445134a11de4ca91f7f92f0 |
View Raw JSON Data
{
"trx_id": "1fc0a910736bbc5a2445134a11de4ca91f7f92f0",
"block": 43215081,
"trx_in_block": 24,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-05-09T04:04:12",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8742.929023 VESTS"
}
]
}steemdelegated 1.201 SP to @calclavia2020/05/08 07:25:36
steemdelegated 1.201 SP to @calclavia
2020/05/08 07:25:36
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 1953.311140 VESTS |
| Transaction Info | Block #43190888/Trx d227c6ec662a7cb7fd6520d83189ff5882c8212a |
View Raw JSON Data
{
"trx_id": "d227c6ec662a7cb7fd6520d83189ff5882c8212a",
"block": 43190888,
"trx_in_block": 21,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-05-08T07:25:36",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "1953.311140 VESTS"
}
]
}steemdelegated 5.384 SP to @calclavia2020/04/15 20:33:39
steemdelegated 5.384 SP to @calclavia
2020/04/15 20:33:39
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8755.906442 VESTS |
| Transaction Info | Block #42561335/Trx e30acc416155fd1efc718a0993c74df84f12f5ac |
View Raw JSON Data
{
"trx_id": "e30acc416155fd1efc718a0993c74df84f12f5ac",
"block": 42561335,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-04-15T20:33:39",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8755.906442 VESTS"
}
]
}2020/01/30 05:33:36
2020/01/30 05:33:36
| parent author | calclavia |
| parent permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| author | steemitboard |
| permlink | steemitboard-notify-calclavia-20200130t053335000z |
| title | |
| body | Congratulations @calclavia! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table> <sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@calclavia) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=calclavia)_</sub> ###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes! |
| json metadata | {"image":["https://steemitboard.com/img/notify.png"]} |
| Transaction Info | Block #40372561/Trx 419e77adaa75cf3e5d1bcca4fb49be1378af24ab |
View Raw JSON Data
{
"trx_id": "419e77adaa75cf3e5d1bcca4fb49be1378af24ab",
"block": 40372561,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-01-30T05:33:36",
"op": [
"comment",
{
"parent_author": "calclavia",
"parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"author": "steemitboard",
"permlink": "steemitboard-notify-calclavia-20200130t053335000z",
"title": "",
"body": "Congratulations @calclavia! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table>\n\n<sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@calclavia) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=calclavia)_</sub>\n\n\n###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!",
"json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
}
]
}dtubesent 0.001 STEEM to @calclavia- "Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube"2019/08/22 15:39:03
dtubesent 0.001 STEEM to @calclavia- "Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube"
2019/08/22 15:39:03
| from | dtube |
| to | calclavia |
| amount | 0.001 STEEM |
| memo | Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube |
| Transaction Info | Block #35778792/Trx ba7040585d8625ab29cd97770034de0716723b7c |
View Raw JSON Data
{
"trx_id": "ba7040585d8625ab29cd97770034de0716723b7c",
"block": 35778792,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2019-08-22T15:39:03",
"op": [
"transfer",
{
"from": "dtube",
"to": "calclavia",
"amount": "0.001 STEEM",
"memo": "Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube"
}
]
}steemdelegated 5.505 SP to @calclavia2019/05/12 13:48:18
steemdelegated 5.505 SP to @calclavia
2019/05/12 13:48:18
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 8951.529247 VESTS |
| Transaction Info | Block #32844169/Trx 925b8149fcdbdbab15806fcbd2d01aac257d8570 |
View Raw JSON Data
{
"trx_id": "925b8149fcdbdbab15806fcbd2d01aac257d8570",
"block": 32844169,
"trx_in_block": 4,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2019-05-12T13:48:18",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "8951.529247 VESTS"
}
]
}2019/01/30 04:46:06
2019/01/30 04:46:06
| parent author | calclavia |
| parent permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| author | steemitboard |
| permlink | steemitboard-notify-calclavia-20190130t044606000z |
| title | |
| body | Congratulations @calclavia! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday1.png</td><td>Happy Birthday! - You are on the Steem blockchain for 1 year!</td></tr></table> <sub>_[Click here to view your Board](https://steemitboard.com/@calclavia)_</sub> > Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**! |
| json metadata | {"image":["https://steemitboard.com/img/notify.png"]} |
| Transaction Info | Block #29899265/Trx d075eba6975568ff0e1acfbd9f21afc39572b9db |
View Raw JSON Data
{
"trx_id": "d075eba6975568ff0e1acfbd9f21afc39572b9db",
"block": 29899265,
"trx_in_block": 5,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2019-01-30T04:46:06",
"op": [
"comment",
{
"parent_author": "calclavia",
"parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"author": "steemitboard",
"permlink": "steemitboard-notify-calclavia-20190130t044606000z",
"title": "",
"body": "Congratulations @calclavia! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday1.png</td><td>Happy Birthday! - You are on the Steem blockchain for 1 year!</td></tr></table>\n\n<sub>_[Click here to view your Board](https://steemitboard.com/@calclavia)_</sub>\n\n\n> Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!",
"json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
}
]
}steemdelegated 5.627 SP to @calclavia2018/05/16 20:09:48
steemdelegated 5.627 SP to @calclavia
2018/05/16 20:09:48
| delegator | steem |
| delegatee | calclavia |
| vesting shares | 9151.081682 VESTS |
| Transaction Info | Block #22489697/Trx 004dc08b76aa69c88f10e61ba3d61ab67a630f4a |
View Raw JSON Data
{
"trx_id": "004dc08b76aa69c88f10e61ba3d61ab67a630f4a",
"block": 22489697,
"trx_in_block": 31,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-05-16T20:09:48",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "calclavia",
"vesting_shares": "9151.081682 VESTS"
}
]
}calclaviareceived 0.012 SBD, 0.004 SP author reward for @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic2018/02/06 00:19:15
calclaviareceived 0.012 SBD, 0.004 SP author reward for @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/02/06 00:19:15
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| sbd payout | 0.012 SBD |
| steem payout | 0.000 STEEM |
| vesting payout | 6.136539 VESTS |
| Transaction Info | Block #19617725/Virtual Operation #9 |
View Raw JSON Data
{
"trx_id": "0000000000000000000000000000000000000000",
"block": 19617725,
"trx_in_block": 4294967295,
"op_in_trx": 0,
"virtual_op": 9,
"timestamp": "2018-02-06T00:19:15",
"op": [
"author_reward",
{
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"sbd_payout": "0.012 SBD",
"steem_payout": "0.000 STEEM",
"vesting_payout": "6.136539 VESTS"
}
]
}calclaviaupvoted (100.00%) @traplord / the-trouble-with-virtual-teams2018/01/31 01:43:15
calclaviaupvoted (100.00%) @traplord / the-trouble-with-virtual-teams
2018/01/31 01:43:15
| voter | calclavia |
| author | traplord |
| permlink | the-trouble-with-virtual-teams |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19446871/Trx fd97a8d0299d20c4a1a1317522c0a6a034059e62 |
View Raw JSON Data
{
"trx_id": "fd97a8d0299d20c4a1a1317522c0a6a034059e62",
"block": 19446871,
"trx_in_block": 29,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-31T01:43:15",
"op": [
"vote",
{
"voter": "calclavia",
"author": "traplord",
"permlink": "the-trouble-with-virtual-teams",
"weight": 10000
}
]
}calclaviaupvoted (100.00%) @traplord / the-importance-of-steemit-and-the-falling-price-of-bitcoin2018/01/31 01:42:12
calclaviaupvoted (100.00%) @traplord / the-importance-of-steemit-and-the-falling-price-of-bitcoin
2018/01/31 01:42:12
| voter | calclavia |
| author | traplord |
| permlink | the-importance-of-steemit-and-the-falling-price-of-bitcoin |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19446850/Trx bbb65d0b71edcffa58a3a03255113eabaa1ac503 |
View Raw JSON Data
{
"trx_id": "bbb65d0b71edcffa58a3a03255113eabaa1ac503",
"block": 19446850,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-31T01:42:12",
"op": [
"vote",
{
"voter": "calclavia",
"author": "traplord",
"permlink": "the-importance-of-steemit-and-the-falling-price-of-bitcoin",
"weight": 10000
}
]
}konchozzzupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 09:52:03
konchozzzupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 09:52:03
| voter | konchozzz |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19427872/Trx 82e619d85019ee5f0399183700f1ad7c05430609 |
View Raw JSON Data
{
"trx_id": "82e619d85019ee5f0399183700f1ad7c05430609",
"block": 19427872,
"trx_in_block": 35,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T09:52:03",
"op": [
"vote",
{
"voter": "konchozzz",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"weight": 10000
}
]
}2018/01/30 00:33:36
2018/01/30 00:33:36
| required auths | [] |
| required posting auths | ["calclavia"] |
| id | follow |
| json | ["follow",{"follower":"calclavia","following":"dtube","what":["blog"]}] |
| Transaction Info | Block #19416718/Trx 09695b4f9a2278d3742bbb821f0b05e63918cb88 |
View Raw JSON Data
{
"trx_id": "09695b4f9a2278d3742bbb821f0b05e63918cb88",
"block": 19416718,
"trx_in_block": 13,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:33:36",
"op": [
"custom_json",
{
"required_auths": [],
"required_posting_auths": [
"calclavia"
],
"id": "follow",
"json": "[\"follow\",{\"follower\":\"calclavia\",\"following\":\"dtube\",\"what\":[\"blog\"]}]"
}
]
}calclaviaupdated their account properties2018/01/30 00:33:30
calclaviaupdated their account properties
2018/01/30 00:33:30
| account | calclavia |
| posting | {"weight_threshold":1,"account_auths":[["dtube.app",1]],"key_auths":[["STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",1]]} |
| memo key | STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3 |
| json metadata | {"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}} |
| Transaction Info | Block #19416716/Trx ab1644a1290abf4d20a0444c8136efae14bc7a9b |
View Raw JSON Data
{
"trx_id": "ab1644a1290abf4d20a0444c8136efae14bc7a9b",
"block": 19416716,
"trx_in_block": 9,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:33:30",
"op": [
"account_update",
{
"account": "calclavia",
"posting": {
"weight_threshold": 1,
"account_auths": [
[
"dtube.app",
1
]
],
"key_auths": [
[
"STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
1
]
]
},
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
}
]
}calclaviaupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:31:12
calclaviaupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:31:12
| voter | calclavia |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19416670/Trx a714d4ae44a327df60ad033553ec51cb31a17f4e |
View Raw JSON Data
{
"trx_id": "a714d4ae44a327df60ad033553ec51cb31a17f4e",
"block": 19416670,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:31:12",
"op": [
"vote",
{
"voter": "calclavia",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"weight": 10000
}
]
}calclaviaupvoted (100.00%) @midasexpo / my-best-deep-dream-creations-so-far2018/01/30 00:30:42
calclaviaupvoted (100.00%) @midasexpo / my-best-deep-dream-creations-so-far
2018/01/30 00:30:42
| voter | calclavia |
| author | midasexpo |
| permlink | my-best-deep-dream-creations-so-far |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19416660/Trx 5d643ebb6f5eb5f4d53fcb4d18a0ae82c84dd393 |
View Raw JSON Data
{
"trx_id": "5d643ebb6f5eb5f4d53fcb4d18a0ae82c84dd393",
"block": 19416660,
"trx_in_block": 2,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:30:42",
"op": [
"vote",
{
"voter": "calclavia",
"author": "midasexpo",
"permlink": "my-best-deep-dream-creations-so-far",
"weight": 10000
}
]
}calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:25:18
calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:25:18
| parent author | |
| parent permlink | a3c |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | Reinforcement Learning using Asynchronous Advantage Actor Critic |
| body | Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center>  </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a | s)) * A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section! |
| json metadata | {"tags":["ai","deep","reinforcement","learning","a3c"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #19416552/Trx 615b591bc2de22b1042fcc88c470dab5b9a14e65 |
View Raw JSON Data
{
"trx_id": "615b591bc2de22b1042fcc88c470dab5b9a14e65",
"block": 19416552,
"trx_in_block": 25,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:25:18",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "a3c",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
"body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nhttps://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
"json_metadata": "{\"tags\":[\"ai\",\"deep\",\"reinforcement\",\"learning\",\"a3c\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:24:57
calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:24:57
| parent author | |
| parent permlink | a3c |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | Reinforcement Learning using Asynchronous Advantage Actor Critic |
| body | Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center>  </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a | s)) * A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section! |
| json metadata | {"tags":["ai","deep","reinforcement","learning","a3c"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #19416545/Trx d345fb5b8b99202bce2de759a5ba565831415e67 |
View Raw JSON Data
{
"trx_id": "d345fb5b8b99202bce2de759a5ba565831415e67",
"block": 19416545,
"trx_in_block": 9,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:24:57",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "a3c",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
"body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nhttps://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
"json_metadata": "{\"tags\":[\"ai\",\"deep\",\"reinforcement\",\"learning\",\"a3c\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}traplordupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:24:33
traplordupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:24:33
| voter | traplord |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19416537/Trx 81c2b4875170e31c553062ea7be806ec707b10ab |
View Raw JSON Data
{
"trx_id": "81c2b4875170e31c553062ea7be806ec707b10ab",
"block": 19416537,
"trx_in_block": 17,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:24:33",
"op": [
"vote",
{
"voter": "traplord",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"weight": 10000
}
]
}calclaviaupvoted (100.00%) @traplord / introductory-post2018/01/30 00:24:12
calclaviaupvoted (100.00%) @traplord / introductory-post
2018/01/30 00:24:12
| voter | calclavia |
| author | traplord |
| permlink | introductory-post |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19416530/Trx fca0fcb50c522df7c410a3f310c5e9e1371f1ea6 |
View Raw JSON Data
{
"trx_id": "fca0fcb50c522df7c410a3f310c5e9e1371f1ea6",
"block": 19416530,
"trx_in_block": 33,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:24:12",
"op": [
"vote",
{
"voter": "calclavia",
"author": "traplord",
"permlink": "introductory-post",
"weight": 10000
}
]
}calclaviaupdated their account properties2018/01/30 00:23:39
calclaviaupdated their account properties
2018/01/30 00:23:39
| account | calclavia |
| memo key | STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3 |
| json metadata | {"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}} |
| Transaction Info | Block #19416519/Trx 265054c97caa088cf91b5c9eb357a1d1bf6eabf4 |
View Raw JSON Data
{
"trx_id": "265054c97caa088cf91b5c9eb357a1d1bf6eabf4",
"block": 19416519,
"trx_in_block": 51,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:23:39",
"op": [
"account_update",
{
"account": "calclavia",
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
}
]
}calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:21:12
calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:21:12
| parent author | |
| parent permlink | a3c |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | Reinforcement Learning using Asynchronous Advantage Actor Critic |
| body | @@ -151,16 +151,30 @@ fective +reinforcement learning |
| json metadata | {"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #19416470/Trx 33db005546e4d17af9b2eabc1f3b8dc9e4beb4c5 |
View Raw JSON Data
{
"trx_id": "33db005546e4d17af9b2eabc1f3b8dc9e4beb4c5",
"block": 19416470,
"trx_in_block": 19,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:21:12",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "a3c",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
"body": "@@ -151,16 +151,30 @@\n fective \n+reinforcement \n learning\n",
"json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:20:21
calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:20:21
| parent author | |
| parent permlink | a3c |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | Reinforcement Learning using Asynchronous Advantage Actor Critic |
| body | @@ -4195,16 +4195,86 @@ aptop.%0A%0A +https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png%0A%0A A3C work |
| json metadata | {"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #19416453/Trx 2939757efc851e5b834c8480869c2a0b9e6d2591 |
View Raw JSON Data
{
"trx_id": "2939757efc851e5b834c8480869c2a0b9e6d2591",
"block": 19416453,
"trx_in_block": 4,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:20:21",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "a3c",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
"body": "@@ -4195,16 +4195,86 @@\n aptop.%0A%0A\n+https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png%0A%0A\n A3C work\n",
"json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}2018/01/30 00:19:33
2018/01/30 00:19:33
| parent author | calclavia |
| parent permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| author | cheetah |
| permlink | cheetah-re-calclaviareinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | |
| body | Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://medium.com/@henrymao/reinforcement-learning-using-asynchronous-advantage-actor-critic-704147f91686 |
| json metadata | |
| Transaction Info | Block #19416437/Trx 221fb273c9576dfe229528bb71ac69f81d9937b8 |
View Raw JSON Data
{
"trx_id": "221fb273c9576dfe229528bb71ac69f81d9937b8",
"block": 19416437,
"trx_in_block": 40,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:19:33",
"op": [
"comment",
{
"parent_author": "calclavia",
"parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"author": "cheetah",
"permlink": "cheetah-re-calclaviareinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "",
"body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://medium.com/@henrymao/reinforcement-learning-using-asynchronous-advantage-actor-critic-704147f91686",
"json_metadata": ""
}
]
}2018/01/30 00:19:30
2018/01/30 00:19:30
| voter | cheetah |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| weight | 8 (0.08%) |
| Transaction Info | Block #19416436/Trx 815cd54349bd4651bb4c949b87a5b9ab6f0465e8 |
View Raw JSON Data
{
"trx_id": "815cd54349bd4651bb4c949b87a5b9ab6f0465e8",
"block": 19416436,
"trx_in_block": 9,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:19:30",
"op": [
"vote",
{
"voter": "cheetah",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"weight": 8
}
]
}calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic2018/01/30 00:19:15
calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic
2018/01/30 00:19:15
| parent author | |
| parent permlink | a3c |
| author | calclavia |
| permlink | reinforcement-learning-using-asynchronous-advantage-actor-critic |
| title | Reinforcement Learning using Asynchronous Advantage Actor Critic |
| body | Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center>  </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a | s)) * A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section! |
| json metadata | {"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #19416431/Trx ce5304208a8cfb6b51963917056ab75856c9df40 |
View Raw JSON Data
{
"trx_id": "ce5304208a8cfb6b51963917056ab75856c9df40",
"block": 19416431,
"trx_in_block": 51,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:19:15",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "a3c",
"author": "calclavia",
"permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
"title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
"body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
"json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}traplordupvoted (100.00%) @calclavia / re-traplord-introductory-post-20180129t235143776z2018/01/30 00:12:39
traplordupvoted (100.00%) @calclavia / re-traplord-introductory-post-20180129t235143776z
2018/01/30 00:12:39
| voter | traplord |
| author | calclavia |
| permlink | re-traplord-introductory-post-20180129t235143776z |
| weight | 10000 (100.00%) |
| Transaction Info | Block #19416299/Trx 23223b433aed9f9ddceb76b5838c2eec990eda82 |
View Raw JSON Data
{
"trx_id": "23223b433aed9f9ddceb76b5838c2eec990eda82",
"block": 19416299,
"trx_in_block": 41,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-30T00:12:39",
"op": [
"vote",
{
"voter": "traplord",
"author": "calclavia",
"permlink": "re-traplord-introductory-post-20180129t235143776z",
"weight": 10000
}
]
}calclaviaupdated their account properties2018/01/29 23:55:24
calclaviaupdated their account properties
2018/01/29 23:55:24
| account | calclavia |
| memo key | STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3 |
| json metadata | {"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"calclavia","about":"Entrepreneur, Software Engineer. Founder of Altum Inc and Calclavia. Pursuing machine learning and AI research.","location":"California","website":"https://calclavia.com"}} |
| Transaction Info | Block #19415954/Trx 83635a66a30eda7ffaef492bf3102e982ace75d8 |
View Raw JSON Data
{
"trx_id": "83635a66a30eda7ffaef492bf3102e982ace75d8",
"block": 19415954,
"trx_in_block": 2,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-29T23:55:24",
"op": [
"account_update",
{
"account": "calclavia",
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"calclavia\",\"about\":\"Entrepreneur, Software Engineer. Founder of Altum Inc and Calclavia. Pursuing machine learning and AI research.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
}
]
}calclaviaupdated their account properties2018/01/29 23:53:30
calclaviaupdated their account properties
2018/01/29 23:53:30
| account | calclavia |
| memo key | STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3 |
| json metadata | {"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX."}} |
| Transaction Info | Block #19415916/Trx 284f06e86f86e1d955f022239276b103d2278886 |
View Raw JSON Data
{
"trx_id": "284f06e86f86e1d955f022239276b103d2278886",
"block": 19415916,
"trx_in_block": 24,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-29T23:53:30",
"op": [
"account_update",
{
"account": "calclavia",
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\"}}"
}
]
}2018/01/29 23:51:45
2018/01/29 23:51:45
| parent author | traplord |
| parent permlink | introductory-post |
| author | calclavia |
| permlink | re-traplord-introductory-post-20180129t235143776z |
| title | |
| body | Glad you made it on the platform! |
| json metadata | {"tags":["introduceyourself"],"app":"steemit/0.1"} |
| Transaction Info | Block #19415881/Trx b1a258e63041261f707a2236d4ef3249c42ac677 |
View Raw JSON Data
{
"trx_id": "b1a258e63041261f707a2236d4ef3249c42ac677",
"block": 19415881,
"trx_in_block": 56,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-29T23:51:45",
"op": [
"comment",
{
"parent_author": "traplord",
"parent_permlink": "introductory-post",
"author": "calclavia",
"permlink": "re-traplord-introductory-post-20180129t235143776z",
"title": "",
"body": "Glad you made it on the platform!",
"json_metadata": "{\"tags\":[\"introduceyourself\"],\"app\":\"steemit/0.1\"}"
}
]
}2018/01/29 23:51:18
2018/01/29 23:51:18
| required auths | [] |
| required posting auths | ["calclavia"] |
| id | follow |
| json | ["follow",{"follower":"calclavia","following":"traplord","what":["blog"]}] |
| Transaction Info | Block #19415872/Trx 04a455b720a3263536fe2a182f8a537e4212e468 |
View Raw JSON Data
{
"trx_id": "04a455b720a3263536fe2a182f8a537e4212e468",
"block": 19415872,
"trx_in_block": 12,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-29T23:51:18",
"op": [
"custom_json",
{
"required_auths": [],
"required_posting_auths": [
"calclavia"
],
"id": "follow",
"json": "[\"follow\",{\"follower\":\"calclavia\",\"following\":\"traplord\",\"what\":[\"blog\"]}]"
}
]
}steemcreated a new account: @calclavia2018/01/29 23:50:21
steemcreated a new account: @calclavia
2018/01/29 23:50:21
| fee | 0.500 STEEM |
| delegation | 29700.000000 VESTS |
| creator | steem |
| new account name | calclavia |
| owner | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",1]]} |
| active | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",1]]} |
| posting | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",1]]} |
| memo key | STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3 |
| json metadata | |
| extensions | [] |
| Transaction Info | Block #19415854/Trx a7feebfd8c0f65193e891451143f612ddeb7a39b |
View Raw JSON Data
{
"trx_id": "a7feebfd8c0f65193e891451143f612ddeb7a39b",
"block": 19415854,
"trx_in_block": 23,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-01-29T23:50:21",
"op": [
"account_create_with_delegation",
{
"fee": "0.500 STEEM",
"delegation": "29700.000000 VESTS",
"creator": "steem",
"new_account_name": "calclavia",
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
1
]
]
},
"memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
"json_metadata": "",
"extensions": []
}
]
}Manabar
Voting Power100.00%
Downvote Power100.00%
Resource Credits100.00%
Reputation Progress0.00%
{
"voting_manabar": {
"current_mana": "8143659806",
"last_update_time": 1779056778
},
"downvote_manabar": {
"current_mana": 2035914951,
"last_update_time": 1779056778
},
"rc_account": {
"account": "calclavia",
"rc_manabar": {
"current_mana": "10164408779",
"last_update_time": 1779056778
},
"max_rc_creation_adjustment": {
"amount": "2020748973",
"precision": 6,
"nai": "@@000000037"
},
"max_rc": "10164408779"
}
}Account Metadata
| POSTING JSON METADATA | |
| profile | {"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"} |
| JSON METADATA | |
| profile | {"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"} |
{
"posting_json_metadata": {
"profile": {
"profile_image": "https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.",
"name": "Henry",
"about": "Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.",
"location": "California",
"website": "https://calclavia.com"
}
},
"json_metadata": {
"profile": {
"profile_image": "https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.",
"name": "Henry",
"about": "Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.",
"location": "California",
"website": "https://calclavia.com"
}
}
}Auth Keys
Owner
Single Signature
Public Keys
STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX1/1
Active
Single Signature
Public Keys
STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K1/1
Posting
Single Signature
Public Keys
STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf1/1
App Permissions
@dtube.app1/1
Memo
STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
{
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [
[
"dtube.app",
1
]
],
"key_auths": [
[
"STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
1
]
]
},
"memo": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3"
}Witness Votes
0 / 30
No active witness votes.
[]