calclavia Steem Profile | Ecosynthesizer

@calclavia

Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.

steemit.com/@calclavia

VOTING POWER100.00%

DOWNVOTE POWER100.00%

RESOURCE CREDITS100.00%

REPUTATION PROGRESS0.00%

Net Worth

0.042USD

STEEM

0.001STEEM

SBD

0.012SBD

Effective Power

5.008SP

├── Own SP

0.629SP

└── Incoming DelegationsDeleg

+4.379SP

Detailed Balance

STEEM
balance	0.001STEEM	STEEM
market_balance	0.000STEEM	STEEM
savings_balance	0.000STEEM	STEEM
reward_steem_balance	0.000STEEM	STEEM
STEEM POWER
Own SP	0.629SP	SP
Delegated Out	0.000SP	SP
Delegation In	4.379SP	SP
Effective Power	5.008SP	SP
Reward SP (pending)	0.003SP	SP
SBD
sbd_balance	0.000SBD	SBD
sbd_conversions	0.000SBD	SBD
sbd_market_balance	0.000SBD	SBD
savings_sbd_balance	0.000SBD	SBD
reward_sbd_balance	0.012SBD	SBD

{
  "balance": "0.001 STEEM",
  "savings_balance": "0.000 STEEM",
  "reward_steem_balance": "0.000 STEEM",
  "vesting_shares": "1023.117226 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7120.542580 VESTS",
  "sbd_balance": "0.000 SBD",
  "savings_sbd_balance": "0.000 SBD",
  "reward_sbd_balance": "0.012 SBD",
  "conversions": []
}

Account Info

name	calclavia
id	692597
rank	615,408
reputation	63962625
created	2018-01-29T23:50:21
recovery_account	steem
proxy	None
post_count	2
comment_count	0
lifetime_vote_count	0
witnesses_voted_for	0
last_post	2018-01-30T00:19:15
last_root_post	2018-01-30T00:19:15
last_vote_time	2018-01-31T01:43:15
proxied_vsf_votes	0, 0, 0, 0
can_vote	1
voting_power	0
delayed_votes	0
balance	0.001 STEEM
savings_balance	0.000 STEEM
sbd_balance	0.000 SBD
savings_sbd_balance	0.000 SBD
vesting_shares	1023.117226 VESTS
delegated_vesting_shares	0.000000 VESTS
received_vesting_shares	7120.542580 VESTS
reward_vesting_balance	6.136539 VESTS
vesting_balance	0.000 STEEM
vesting_withdraw_rate	0.000000 VESTS
next_vesting_withdrawal	1969-12-31T23:59:59
withdrawn	0
to_withdraw	0
withdraw_routes	0
savings_withdraw_requests	0
last_account_recovery	1970-01-01T00:00:00
reset_account	null
last_owner_update	1970-01-01T00:00:00
last_account_update	2018-01-30T00:33:30
mined	No
sbd_seconds	0
sbd_last_interest_payment	1970-01-01T00:00:00
savings_sbd_last_interest_payment	1970-01-01T00:00:00

{
  "id": 692597,
  "name": "calclavia",
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [
      [
        "dtube.app",
        1
      ]
    ],
    "key_auths": [
      [
        "STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
        1
      ]
    ]
  },
  "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
  "json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}",
  "posting_json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}",
  "proxy": "",
  "last_owner_update": "1970-01-01T00:00:00",
  "last_account_update": "2018-01-30T00:33:30",
  "created": "2018-01-29T23:50:21",
  "mined": false,
  "recovery_account": "steem",
  "last_account_recovery": "1970-01-01T00:00:00",
  "reset_account": "null",
  "comment_count": 0,
  "lifetime_vote_count": 0,
  "post_count": 2,
  "can_vote": true,
  "voting_manabar": {
    "current_mana": "8143659806",
    "last_update_time": 1779056778
  },
  "downvote_manabar": {
    "current_mana": 2035914951,
    "last_update_time": 1779056778
  },
  "voting_power": 0,
  "balance": "0.001 STEEM",
  "savings_balance": "0.000 STEEM",
  "sbd_balance": "0.000 SBD",
  "sbd_seconds": "0",
  "sbd_seconds_last_update": "1970-01-01T00:00:00",
  "sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_sbd_balance": "0.000 SBD",
  "savings_sbd_seconds": "0",
  "savings_sbd_seconds_last_update": "1970-01-01T00:00:00",
  "savings_sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_withdraw_requests": 0,
  "reward_sbd_balance": "0.012 SBD",
  "reward_steem_balance": "0.000 STEEM",
  "reward_vesting_balance": "6.136539 VESTS",
  "reward_vesting_steem": "0.003 STEEM",
  "vesting_shares": "1023.117226 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7120.542580 VESTS",
  "vesting_withdraw_rate": "0.000000 VESTS",
  "next_vesting_withdrawal": "1969-12-31T23:59:59",
  "withdrawn": 0,
  "to_withdraw": 0,
  "withdraw_routes": 0,
  "curation_rewards": 0,
  "posting_rewards": 6,
  "proxied_vsf_votes": [
    0,
    0,
    0,
    0
  ],
  "witnesses_voted_for": 0,
  "last_post": "2018-01-30T00:19:15",
  "last_root_post": "2018-01-30T00:19:15",
  "last_vote_time": "2018-01-31T01:43:15",
  "post_bandwidth": 0,
  "pending_claimed_accounts": 0,
  "vesting_balance": "0.000 STEEM",
  "reputation": 63962625,
  "transfer_history": [],
  "market_history": [],
  "post_history": [],
  "vote_history": [],
  "other_history": [],
  "witness_votes": [],
  "tags_usage": [],
  "guest_bloggers": [],
  "rank": 615408
}

Withdraw Routes

Incoming	Outgoing
Empty	Empty

{
  "incoming": [],
  "outgoing": []
}

From Date

To Date

steemdelegated 4.379 SP to @calclavia

2026/05/17 22:26:18 UTC

106,141,280|c12ec85

delegator	steem
delegatee	calclavia
vesting shares	7120.542580 VESTS
Transaction Info	Block #106141280/Trx c12ec85524d57ce045b2b53e2d3b2ca961879844

View Raw JSON Data

{
  "trx_id": "c12ec85524d57ce045b2b53e2d3b2ca961879844",
  "block": 106141280,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-17T22:26:18",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "7120.542580 VESTS"
    }
  ]
}

steemdelegated 2.711 SP to @calclavia

2026/05/11 20:36:45 UTC

105,967,061|2ae526e

delegator	steem
delegatee	calclavia
vesting shares	4408.332175 VESTS
Transaction Info	Block #105967061/Trx 2ae526e97f5aab89f5ab090883b2a81c2c26d4c4

View Raw JSON Data

{
  "trx_id": "2ae526e97f5aab89f5ab090883b2a81c2c26d4c4",
  "block": 105967061,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-11T20:36:45",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "4408.332175 VESTS"
    }
  ]
}

steemdelegated 4.386 SP to @calclavia

2026/04/25 21:50:03 UTC

105,508,984|bfe71fb

delegator	steem
delegatee	calclavia
vesting shares	7133.058336 VESTS
Transaction Info	Block #105508984/Trx bfe71fbd5f03781dcec33fb31b4580761626c775

View Raw JSON Data

{
  "trx_id": "bfe71fbd5f03781dcec33fb31b4580761626c775",
  "block": 105508984,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-04-25T21:50:03",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "7133.058336 VESTS"
    }
  ]
}

steemdelegated 2.736 SP to @calclavia

2026/01/23 03:01:15 UTC

102,846,089|aed9118

delegator	steem
delegatee	calclavia
vesting shares	4449.878994 VESTS
Transaction Info	Block #102846089/Trx aed9118bc6ef54bdbf9c09536d46d8b9ff2bb2c4

View Raw JSON Data

{
  "trx_id": "aed9118bc6ef54bdbf9c09536d46d8b9ff2bb2c4",
  "block": 102846089,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-01-23T03:01:15",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "4449.878994 VESTS"
    }
  ]
}

steemdelegated 2.837 SP to @calclavia

2024/12/16 22:20:36 UTC

91,292,496|cde2d6b

delegator	steem
delegatee	calclavia
vesting shares	4614.098191 VESTS
Transaction Info	Block #91292496/Trx cde2d6bb195d33b6bf2cd2128257e0c34e038341

View Raw JSON Data

{
  "trx_id": "cde2d6bb195d33b6bf2cd2128257e0c34e038341",
  "block": 91292496,
  "trx_in_block": 4,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2024-12-16T22:20:36",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "4614.098191 VESTS"
    }
  ]
}

steemdelegated 2.941 SP to @calclavia

2023/11/13 14:05:36 UTC

79,846,757|072f071

delegator	steem
delegatee	calclavia
vesting shares	4783.231723 VESTS
Transaction Info	Block #79846757/Trx 072f0719c62d1bf8abd308ddcbca538b95c55940

View Raw JSON Data

{
  "trx_id": "072f0719c62d1bf8abd308ddcbca538b95c55940",
  "block": 79846757,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-11-13T14:05:36",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "4783.231723 VESTS"
    }
  ]
}

steemdelegated 4.748 SP to @calclavia

2023/09/21 19:43:21 UTC

78,345,313|8c9a5a0

delegator	steem
delegatee	calclavia
vesting shares	7720.510509 VESTS
Transaction Info	Block #78345313/Trx 8c9a5a09da1bd8a440fc027dc520771f5116cef2

View Raw JSON Data

{
  "trx_id": "8c9a5a09da1bd8a440fc027dc520771f5116cef2",
  "block": 78345313,
  "trx_in_block": 6,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-09-21T19:43:21",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "7720.510509 VESTS"
    }
  ]
}

steemdelegated 4.884 SP to @calclavia

2022/11/03 09:45:09 UTC

69,110,932|0b475fc

delegator	steem
delegatee	calclavia
vesting shares	7942.191947 VESTS
Transaction Info	Block #69110932/Trx 0b475fce1d5f86534a6aa4c64d431ccf02777444

View Raw JSON Data

{
  "trx_id": "0b475fce1d5f86534a6aa4c64d431ccf02777444",
  "block": 69110932,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-11-03T09:45:09",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "7942.191947 VESTS"
    }
  ]
}

steemdelegated 5.019 SP to @calclavia

2022/01/17 09:10:39 UTC

60,807,293|3a77b75

delegator	steem
delegatee	calclavia
vesting shares	8162.725178 VESTS
Transaction Info	Block #60807293/Trx 3a77b7559471556f8b164b0c3b0ee44f300d9b85

View Raw JSON Data

{
  "trx_id": "3a77b7559471556f8b164b0c3b0ee44f300d9b85",
  "block": 60807293,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-01-17T09:10:39",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8162.725178 VESTS"
    }
  ]
}

steemdelegated 5.132 SP to @calclavia

2021/06/13 23:10:12 UTC

54,605,769|104a8f9

delegator	steem
delegatee	calclavia
vesting shares	8346.493836 VESTS
Transaction Info	Block #54605769/Trx 104a8f9c7314609e1d3f7f9d07dda7aa12e6d397

View Raw JSON Data

{
  "trx_id": "104a8f9c7314609e1d3f7f9d07dda7aa12e6d397",
  "block": 54605769,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2021-06-13T23:10:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8346.493836 VESTS"
    }
  ]
}

steemdelegated 5.248 SP to @calclavia

2020/12/11 09:31:24 UTC

49,353,292|1469c33

delegator	steem
delegatee	calclavia
vesting shares	8533.915810 VESTS
Transaction Info	Block #49353292/Trx 1469c3393558c7c1e13519ab118d29e7864dd9da

View Raw JSON Data

{
  "trx_id": "1469c3393558c7c1e13519ab118d29e7864dd9da",
  "block": 49353292,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-11T09:31:24",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8533.915810 VESTS"
    }
  ]
}

steemdelegated 1.176 SP to @calclavia

2020/12/06 03:08:51 UTC

49,204,861|2d2ba7b

delegator	steem
delegatee	calclavia
vesting shares	1912.543513 VESTS
Transaction Info	Block #49204861/Trx 2d2ba7bc5992321738df562fa3dd5a1a49d2e398

View Raw JSON Data

{
  "trx_id": "2d2ba7bc5992321738df562fa3dd5a1a49d2e398",
  "block": 49204861,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-06T03:08:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "1912.543513 VESTS"
    }
  ]
}

steemdelegated 5.252 SP to @calclavia

2020/12/05 11:05:48 UTC

49,185,966|a997741

delegator	steem
delegatee	calclavia
vesting shares	8540.282449 VESTS
Transaction Info	Block #49185966/Trx a9977412ac6a8abe15cf0acd66b698ef180d64fd

View Raw JSON Data

{
  "trx_id": "a9977412ac6a8abe15cf0acd66b698ef180d64fd",
  "block": 49185966,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-05T11:05:48",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8540.282449 VESTS"
    }
  ]
}

steemdelegated 1.181 SP to @calclavia

2020/11/02 12:07:27 UTC

48,253,665|c0973de

delegator	steem
delegatee	calclavia
vesting shares	1920.017158 VESTS
Transaction Info	Block #48253665/Trx c0973de678815780534629c63076c5f3d08ee559

View Raw JSON Data

{
  "trx_id": "c0973de678815780534629c63076c5f3d08ee559",
  "block": 48253665,
  "trx_in_block": 4,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-11-02T12:07:27",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "1920.017158 VESTS"
    }
  ]
}

steemdelegated 5.376 SP to @calclavia

2020/05/09 04:04:12 UTC

43,215,081|1fc0a91

delegator	steem
delegatee	calclavia
vesting shares	8742.929023 VESTS
Transaction Info	Block #43215081/Trx 1fc0a910736bbc5a2445134a11de4ca91f7f92f0

View Raw JSON Data

{
  "trx_id": "1fc0a910736bbc5a2445134a11de4ca91f7f92f0",
  "block": 43215081,
  "trx_in_block": 24,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-09T04:04:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8742.929023 VESTS"
    }
  ]
}

steemdelegated 1.201 SP to @calclavia

2020/05/08 07:25:36 UTC

43,190,888|d227c6e

delegator	steem
delegatee	calclavia
vesting shares	1953.311140 VESTS
Transaction Info	Block #43190888/Trx d227c6ec662a7cb7fd6520d83189ff5882c8212a

View Raw JSON Data

{
  "trx_id": "d227c6ec662a7cb7fd6520d83189ff5882c8212a",
  "block": 43190888,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-08T07:25:36",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "1953.311140 VESTS"
    }
  ]
}

steemdelegated 5.384 SP to @calclavia

2020/04/15 20:33:39 UTC

42,561,335|e30acc4

delegator	steem
delegatee	calclavia
vesting shares	8755.906442 VESTS
Transaction Info	Block #42561335/Trx e30acc416155fd1efc718a0993c74df84f12f5ac

View Raw JSON Data

{
  "trx_id": "e30acc416155fd1efc718a0993c74df84f12f5ac",
  "block": 42561335,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-04-15T20:33:39",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8755.906442 VESTS"
    }
  ]
}

steemitboardreplied to @calclavia / steemitboard-notify-calclavia-20200130t053335000z

2020/01/30 05:33:36 UTC

40,372,561|419e77a

parent author	calclavia
parent permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
author	steemitboard
permlink	steemitboard-notify-calclavia-20200130t053335000z
title
body	Congratulations @calclavia! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table> <sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@calclavia) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=calclavia)_</sub> ###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!
json metadata	{"image":["https://steemitboard.com/img/notify.png"]}
Transaction Info	Block #40372561/Trx 419e77adaa75cf3e5d1bcca4fb49be1378af24ab

View Raw JSON Data

{
  "trx_id": "419e77adaa75cf3e5d1bcca4fb49be1378af24ab",
  "block": 40372561,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-01-30T05:33:36",
  "op": [
    "comment",
    {
      "parent_author": "calclavia",
      "parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-calclavia-20200130t053335000z",
      "title": "",
      "body": "Congratulations @calclavia! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table>\n\n<sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@calclavia) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=calclavia)_</sub>\n\n\n###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}

dtubesent 0.001 STEEM to @calclavia- "Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube"

2019/08/22 15:39:03 UTC

35,778,792|ba70405

from	dtube
to	calclavia
amount	0.001 STEEM
memo	Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube
Transaction Info	Block #35778792/Trx ba7040585d8625ab29cd97770034de0716723b7c

View Raw JSON Data

{
  "trx_id": "ba7040585d8625ab29cd97770034de0716723b7c",
  "block": 35778792,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-08-22T15:39:03",
  "op": [
    "transfer",
    {
      "from": "dtube",
      "to": "calclavia",
      "amount": "0.001 STEEM",
      "memo": "Time is running out, claim your DTube account now before anyone else can! Login at https://d.tube"
    }
  ]
}

steemdelegated 5.505 SP to @calclavia

2019/05/12 13:48:18 UTC

32,844,169|925b814

delegator	steem
delegatee	calclavia
vesting shares	8951.529247 VESTS
Transaction Info	Block #32844169/Trx 925b8149fcdbdbab15806fcbd2d01aac257d8570

View Raw JSON Data

{
  "trx_id": "925b8149fcdbdbab15806fcbd2d01aac257d8570",
  "block": 32844169,
  "trx_in_block": 4,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-05-12T13:48:18",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "8951.529247 VESTS"
    }
  ]
}

steemitboardreplied to @calclavia / steemitboard-notify-calclavia-20190130t044606000z

2019/01/30 04:46:06 UTC

29,899,265|d075eba

parent author	calclavia
parent permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
author	steemitboard
permlink	steemitboard-notify-calclavia-20190130t044606000z
title
body	Congratulations @calclavia! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday1.png</td><td>Happy Birthday! - You are on the Steem blockchain for 1 year!</td></tr></table> <sub>_[Click here to view your Board](https://steemitboard.com/@calclavia)_</sub> > Support [SteemitBoard's project](https://steemit.com/@steemitboard)! [Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) and get one more award!
json metadata	{"image":["https://steemitboard.com/img/notify.png"]}
Transaction Info	Block #29899265/Trx d075eba6975568ff0e1acfbd9f21afc39572b9db

View Raw JSON Data

{
  "trx_id": "d075eba6975568ff0e1acfbd9f21afc39572b9db",
  "block": 29899265,
  "trx_in_block": 5,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-01-30T04:46:06",
  "op": [
    "comment",
    {
      "parent_author": "calclavia",
      "parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-calclavia-20190130t044606000z",
      "title": "",
      "body": "Congratulations @calclavia! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@calclavia/birthday1.png</td><td>Happy Birthday! - You are on the Steem blockchain for 1 year!</td></tr></table>\n\n<sub>_[Click here to view your Board](https://steemitboard.com/@calclavia)_</sub>\n\n\n> Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}

steemdelegated 5.627 SP to @calclavia

2018/05/16 20:09:48 UTC

22,489,697|004dc08

delegator	steem
delegatee	calclavia
vesting shares	9151.081682 VESTS
Transaction Info	Block #22489697/Trx 004dc08b76aa69c88f10e61ba3d61ab67a630f4a

View Raw JSON Data

{
  "trx_id": "004dc08b76aa69c88f10e61ba3d61ab67a630f4a",
  "block": 22489697,
  "trx_in_block": 31,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-05-16T20:09:48",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "calclavia",
      "vesting_shares": "9151.081682 VESTS"
    }
  ]
}

calclaviareceived 0.012 SBD, 0.004 SP author reward for @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/02/06 00:19:15 UTC

19,617,725|virtual

author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
sbd payout	0.012 SBD
steem payout	0.000 STEEM
vesting payout	6.136539 VESTS
Transaction Info	Block #19617725/Virtual Operation #9

View Raw JSON Data

{
  "trx_id": "0000000000000000000000000000000000000000",
  "block": 19617725,
  "trx_in_block": 4294967295,
  "op_in_trx": 0,
  "virtual_op": 9,
  "timestamp": "2018-02-06T00:19:15",
  "op": [
    "author_reward",
    {
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "sbd_payout": "0.012 SBD",
      "steem_payout": "0.000 STEEM",
      "vesting_payout": "6.136539 VESTS"
    }
  ]
}

calclaviaupvoted (100.00%) @traplord / the-trouble-with-virtual-teams

2018/01/31 01:43:15 UTC

19,446,871|fd97a8d

voter	calclavia
author	traplord
permlink	the-trouble-with-virtual-teams
weight	10000 (100.00%)
Transaction Info	Block #19446871/Trx fd97a8d0299d20c4a1a1317522c0a6a034059e62

View Raw JSON Data

{
  "trx_id": "fd97a8d0299d20c4a1a1317522c0a6a034059e62",
  "block": 19446871,
  "trx_in_block": 29,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-31T01:43:15",
  "op": [
    "vote",
    {
      "voter": "calclavia",
      "author": "traplord",
      "permlink": "the-trouble-with-virtual-teams",
      "weight": 10000
    }
  ]
}

calclaviaupvoted (100.00%) @traplord / the-importance-of-steemit-and-the-falling-price-of-bitcoin

2018/01/31 01:42:12 UTC

19,446,850|bbb65d0

voter	calclavia
author	traplord
permlink	the-importance-of-steemit-and-the-falling-price-of-bitcoin
weight	10000 (100.00%)
Transaction Info	Block #19446850/Trx bbb65d0b71edcffa58a3a03255113eabaa1ac503

View Raw JSON Data

{
  "trx_id": "bbb65d0b71edcffa58a3a03255113eabaa1ac503",
  "block": 19446850,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-31T01:42:12",
  "op": [
    "vote",
    {
      "voter": "calclavia",
      "author": "traplord",
      "permlink": "the-importance-of-steemit-and-the-falling-price-of-bitcoin",
      "weight": 10000
    }
  ]
}

konchozzzupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 09:52:03 UTC

19,427,872|82e619d

voter	konchozzz
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
weight	10000 (100.00%)
Transaction Info	Block #19427872/Trx 82e619d85019ee5f0399183700f1ad7c05430609

View Raw JSON Data

{
  "trx_id": "82e619d85019ee5f0399183700f1ad7c05430609",
  "block": 19427872,
  "trx_in_block": 35,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T09:52:03",
  "op": [
    "vote",
    {
      "voter": "konchozzz",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "weight": 10000
    }
  ]
}

calclaviafollowed @dtube

2018/01/30 00:33:36 UTC

19,416,718|09695b4

required auths	[]
required posting auths	["calclavia"]
id	follow
json	["follow",{"follower":"calclavia","following":"dtube","what":["blog"]}]
Transaction Info	Block #19416718/Trx 09695b4f9a2278d3742bbb821f0b05e63918cb88

View Raw JSON Data

{
  "trx_id": "09695b4f9a2278d3742bbb821f0b05e63918cb88",
  "block": 19416718,
  "trx_in_block": 13,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:33:36",
  "op": [
    "custom_json",
    {
      "required_auths": [],
      "required_posting_auths": [
        "calclavia"
      ],
      "id": "follow",
      "json": "[\"follow\",{\"follower\":\"calclavia\",\"following\":\"dtube\",\"what\":[\"blog\"]}]"
    }
  ]
}

calclaviaupdated their account properties

2018/01/30 00:33:30 UTC

19,416,716|ab1644a

account	calclavia
posting	{"weight_threshold":1,"account_auths":[["dtube.app",1]],"key_auths":[["STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",1]]}
memo key	STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
json metadata	{"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}}
Transaction Info	Block #19416716/Trx ab1644a1290abf4d20a0444c8136efae14bc7a9b

View Raw JSON Data

{
  "trx_id": "ab1644a1290abf4d20a0444c8136efae14bc7a9b",
  "block": 19416716,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:33:30",
  "op": [
    "account_update",
    {
      "account": "calclavia",
      "posting": {
        "weight_threshold": 1,
        "account_auths": [
          [
            "dtube.app",
            1
          ]
        ],
        "key_auths": [
          [
            "STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
            1
          ]
        ]
      },
      "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
      "json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
    }
  ]
}

calclaviaupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:31:12 UTC

19,416,670|a714d4a

voter	calclavia
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
weight	10000 (100.00%)
Transaction Info	Block #19416670/Trx a714d4ae44a327df60ad033553ec51cb31a17f4e

View Raw JSON Data

{
  "trx_id": "a714d4ae44a327df60ad033553ec51cb31a17f4e",
  "block": 19416670,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:31:12",
  "op": [
    "vote",
    {
      "voter": "calclavia",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "weight": 10000
    }
  ]
}

calclaviaupvoted (100.00%) @midasexpo / my-best-deep-dream-creations-so-far

2018/01/30 00:30:42 UTC

19,416,660|5d643eb

voter	calclavia
author	midasexpo
permlink	my-best-deep-dream-creations-so-far
weight	10000 (100.00%)
Transaction Info	Block #19416660/Trx 5d643ebb6f5eb5f4d53fcb4d18a0ae82c84dd393

View Raw JSON Data

{
  "trx_id": "5d643ebb6f5eb5f4d53fcb4d18a0ae82c84dd393",
  "block": 19416660,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:30:42",
  "op": [
    "vote",
    {
      "voter": "calclavia",
      "author": "midasexpo",
      "permlink": "my-best-deep-dream-creations-so-far",
      "weight": 10000
    }
  ]
}

calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:25:18 UTC

19,416,552|615b591

parent author
parent permlink	a3c
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
title	Reinforcement Learning using Asynchronous Advantage Actor Critic
body	Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center> ![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png) </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a \| s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a \| s)) A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a \| s)) * A(s) - βH(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 Σ(R — V(s))² - log(π(a \| s)) * A(s) - βH(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!
json metadata	{"tags":["ai","deep","reinforcement","learning","a3c"],"image":["https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #19416552/Trx 615b591bc2de22b1042fcc88c470dab5b9a14e65

View Raw JSON Data

{
  "trx_id": "615b591bc2de22b1042fcc88c470dab5b9a14e65",
  "block": 19416552,
  "trx_in_block": 25,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:25:18",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "a3c",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
      "body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png)\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nhttps://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
      "json_metadata": "{\"tags\":[\"ai\",\"deep\",\"reinforcement\",\"learning\",\"a3c\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}

calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:24:57 UTC

19,416,545|d345fb5

parent author
parent permlink	a3c
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
title	Reinforcement Learning using Asynchronous Advantage Actor Critic
body	Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center> ![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png) </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a \| s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a \| s)) A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a \| s)) * A(s) - βH(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 Σ(R — V(s))² - log(π(a \| s)) * A(s) - βH(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!
json metadata	{"tags":["ai","deep","reinforcement","learning","a3c"],"image":["https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #19416545/Trx d345fb5b8b99202bce2de759a5ba565831415e67

View Raw JSON Data

{
  "trx_id": "d345fb5b8b99202bce2de759a5ba565831415e67",
  "block": 19416545,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:24:57",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "a3c",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
      "body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective reinforcement learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png)\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nhttps://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
      "json_metadata": "{\"tags\":[\"ai\",\"deep\",\"reinforcement\",\"learning\",\"a3c\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}

traplordupvoted (100.00%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:24:33 UTC

19,416,537|81c2b48

voter	traplord
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
weight	10000 (100.00%)
Transaction Info	Block #19416537/Trx 81c2b4875170e31c553062ea7be806ec707b10ab

View Raw JSON Data

{
  "trx_id": "81c2b4875170e31c553062ea7be806ec707b10ab",
  "block": 19416537,
  "trx_in_block": 17,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:24:33",
  "op": [
    "vote",
    {
      "voter": "traplord",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "weight": 10000
    }
  ]
}

calclaviaupvoted (100.00%) @traplord / introductory-post

2018/01/30 00:24:12 UTC

19,416,530|fca0fcb

voter	calclavia
author	traplord
permlink	introductory-post
weight	10000 (100.00%)
Transaction Info	Block #19416530/Trx fca0fcb50c522df7c410a3f310c5e9e1371f1ea6

View Raw JSON Data

{
  "trx_id": "fca0fcb50c522df7c410a3f310c5e9e1371f1ea6",
  "block": 19416530,
  "trx_in_block": 33,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:24:12",
  "op": [
    "vote",
    {
      "voter": "calclavia",
      "author": "traplord",
      "permlink": "introductory-post",
      "weight": 10000
    }
  ]
}

calclaviaupdated their account properties

2018/01/30 00:23:39 UTC

19,416,519|265054c

account	calclavia
memo key	STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
json metadata	{"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}}
Transaction Info	Block #19416519/Trx 265054c97caa088cf91b5c9eb357a1d1bf6eabf4

View Raw JSON Data

{
  "trx_id": "265054c97caa088cf91b5c9eb357a1d1bf6eabf4",
  "block": 19416519,
  "trx_in_block": 51,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:23:39",
  "op": [
    "account_update",
    {
      "account": "calclavia",
      "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
      "json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"Henry\",\"about\":\"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
    }
  ]
}

calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:21:12 UTC

19,416,470|33db005

parent author
parent permlink	a3c
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
title	Reinforcement Learning using Asynchronous Advantage Actor Critic
body	@@ -151,16 +151,30 @@ fective +reinforcement learning
json metadata	{"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #19416470/Trx 33db005546e4d17af9b2eabc1f3b8dc9e4beb4c5

View Raw JSON Data

{
  "trx_id": "33db005546e4d17af9b2eabc1f3b8dc9e4beb4c5",
  "block": 19416470,
  "trx_in_block": 19,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:21:12",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "a3c",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
      "body": "@@ -151,16 +151,30 @@\n fective \n+reinforcement \n learning\n",
      "json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}

calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:20:21 UTC

19,416,453|2939757

parent author
parent permlink	a3c
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
title	Reinforcement Learning using Asynchronous Advantage Actor Critic
body	@@ -4195,16 +4195,86 @@ aptop.%0A%0A +https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png%0A%0A A3C work
json metadata	{"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png","https://cdn-images-1.medium.com/max/800/1YtnGhtSAMnnHSL8PvS7t_w.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #19416453/Trx 2939757efc851e5b834c8480869c2a0b9e6d2591

View Raw JSON Data

{
  "trx_id": "2939757efc851e5b834c8480869c2a0b9e6d2591",
  "block": 19416453,
  "trx_in_block": 4,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:20:21",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "a3c",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
      "body": "@@ -4195,16 +4195,86 @@\n aptop.%0A%0A\n+https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png%0A%0A\n A3C work\n",
      "json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\",\"https://cdn-images-1.medium.com/max/800/1*YtnGhtSAMnnHSL8PvS7t_w.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}

cheetahreplied to @calclavia / cheetah-re-calclaviareinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:19:33 UTC

19,416,437|221fb27

parent author	calclavia
parent permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
author	cheetah
permlink	cheetah-re-calclaviareinforcement-learning-using-asynchronous-advantage-actor-critic
title
body	Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://medium.com/@henrymao/reinforcement-learning-using-asynchronous-advantage-actor-critic-704147f91686
json metadata
Transaction Info	Block #19416437/Trx 221fb273c9576dfe229528bb71ac69f81d9937b8

View Raw JSON Data

{
  "trx_id": "221fb273c9576dfe229528bb71ac69f81d9937b8",
  "block": 19416437,
  "trx_in_block": 40,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:19:33",
  "op": [
    "comment",
    {
      "parent_author": "calclavia",
      "parent_permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "author": "cheetah",
      "permlink": "cheetah-re-calclaviareinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "",
      "body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://medium.com/@henrymao/reinforcement-learning-using-asynchronous-advantage-actor-critic-704147f91686",
      "json_metadata": ""
    }
  ]
}

cheetahupvoted (0.08%) @calclavia / reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:19:30 UTC

19,416,436|815cd54

voter	cheetah
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
weight	8 (0.08%)
Transaction Info	Block #19416436/Trx 815cd54349bd4651bb4c949b87a5b9ab6f0465e8

View Raw JSON Data

{
  "trx_id": "815cd54349bd4651bb4c949b87a5b9ab6f0465e8",
  "block": 19416436,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:19:30",
  "op": [
    "vote",
    {
      "voter": "cheetah",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "weight": 8
    }
  ]
}

calclaviapublished a new post: reinforcement-learning-using-asynchronous-advantage-actor-critic

2018/01/30 00:19:15 UTC

19,416,431|ce53042

parent author
parent permlink	a3c
author	calclavia
permlink	reinforcement-learning-using-asynchronous-advantage-actor-critic
title	Reinforcement Learning using Asynchronous Advantage Actor Critic
body	Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/). # Actor Critic Models Before we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards. <center> ![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0Rda_s6qNiZhxmQEJ.png) </center> The AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf). ## Objective Functions Looking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a \| s)_ as the probability of the sampled action a given state _s_. We arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_). > Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error) > Policy Loss: _L = -log(π(a \| s)) A(s)_ But not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation. > _H(π) = - Σ(P(x) log(P(x))_ Entropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima > Policy Loss: _L = - log(π(a \| s)) * A(s) - βH(π)_ When we combine the two loss functions, we get the loss function for the model overall: > _L = 0.5 Σ(R — V(s))² - log(π(a \| s)) * A(s) - β*H(π)_ Notice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent! # Asynchronous The interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop. A3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update. However, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously. That’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl). I’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!
json metadata	{"tags":["a3c","deep","reinforcement","learning","algorithm"],"image":["https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png"],"links":["https://github.com/calclavia/rl","https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l","https://gym.openai.com/","https://arxiv.org/pdf/1506.02438.pdf","https://www.youtube.com/watch?v=KHZVXao4qXs","https://blog.openai.com/baselines-acktr-a2c/"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #19416431/Trx ce5304208a8cfb6b51963917056ab75856c9df40

View Raw JSON Data

{
  "trx_id": "ce5304208a8cfb6b51963917056ab75856c9df40",
  "block": 19416431,
  "trx_in_block": 51,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:19:15",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "a3c",
      "author": "calclavia",
      "permlink": "reinforcement-learning-using-asynchronous-advantage-actor-critic",
      "title": "Reinforcement Learning using Asynchronous Advantage Actor Critic",
      "body": "Reinforcement learning is an extremely exciting field that has pushed the boundaries of artificial intelligence. In my research, I stumbled upon an effective learning method called Asynchronous Advantage Actor Critic (A3C) published by DeepMind. This algorithm beats the famous DQN by quite a margin and also seems to yield more stable results. I wanted to give a high level explanation in this post of how the algorithm works, hopefully inspiring more people to apply it in their projects. If you’re interested in the code, I implemented the [algorithm](https://github.com/calclavia/rl) using Tensorflow and Keras inspired by this [Medium article](https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l). The library is compatible with [OpenAI’s Gym API](https://gym.openai.com/).\n\n# Actor Critic Models\nBefore we dive into the asynchronous part, I’d like to explain Actor-Critic (AC) learning models. In a reinforcement learning problem, an agent exists in some state _s_ and tries to choose an action _a_ to maximize its discounted future rewards.\n\n<center>\n![Reinforcement Learning Diagram](https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png)\n</center>\n\nThe AC agent is comprised of an actor and a critic. The actor attempts to learn a policy _π(s)_ (AKA the rule that the agent follows) by receiving feedback from a critic. The critic learns a value function _V(s)_ (the expected return in rewards), which is used to determine how advantageous it is to be in a particular state. The advantage is defined as _A(s) = Q(s, a) - V(s)_. In practice, we don’t want to compute _Q(s, a)_. Instead, we formulate an estimate of the advantage function as _A(s) = r + γV(s’) - V(s)_, where _r_ is the current reward and _γ_ is the discount factor. This achieves the same result without needing to learn the _Q_ function. An even more effective method would be to use [generalized advantage estimation](https://arxiv.org/pdf/1506.02438.pdf).\n\n## Objective Functions\nLooking at the actor-critic agent from a neural network perspective, we would give the agent two outputs: value and policy. The value output predicts a scalar that learns the value function _V(s)_. The policy output _π(s)_ (softmax activation) is a vector that represents a probability distribution over the actions. We pick the action non-deterministically by sampling from this probability distribution. We denote _π(a | s)_ as the probability of the sampled action a given state _s_.\n\nWe arrive at the following loss functions (we want to minimize these). _R_ represents the discounted future reward (_R = r + γV(s’)_).\n\n> Value Loss: _L = Σ(R - V(s))²_ (Sum Squared Error)\n> Policy Loss: _L = -log(π(a | s)) * A(s)_\n\nBut not so fast! While the loss functions above would work, it is better to introduce the entropy _H(π)_ to the equation.\n\n> _H(π) = - Σ(P(x) log(P(x))_\n\nEntropy is a measure of how spread out the probabilities are. The higher the entropy, the more similar each action’s probability will be, which makes the agent more uncertain about which action to choose. Entropy can be added to the loss function to encourage exploration by preventing the agent from being too decisive and converging at local optima\n\n> Policy Loss: _L = - log(π(a | s)) * A(s) - β*H(π)_\n\nWhen we combine the two loss functions, we get the loss function for the model overall:\n\n> _L = 0.5 * Σ(R — V(s))² - log(π(a | s)) * A(s) - β*H(π)_\n\nNotice that the loss for value is set to 50% to make policy learning faster than value learning. For more information on the derivations of these loss functions, I recommend watching [David Silver’s RL lecture videos](https://www.youtube.com/watch?v=KHZVXao4qXs). With that, we can train our AC agent!\n\n# Asynchronous\nThe interesting part about A3C is the first A — asynchronous. DeepMind’s paper showed that by introducing asynchronous training, we can reduce the correlation between episodes, improving various methods of learning including Q-learning (better data efficiency). It is also a more efficient use of multi-core CPUs, allowing us to train agents to do quite amazing things with just a laptop.\n\nA3C works by spawning minion AC agents, each performing actions in their own separate environments and updating the master neural network after a certain amount of actions have been taken. The individual agents sync their weights with the master network after every gradient update.\n\nHowever, [more recent research from OpenAI](https://blog.openai.com/baselines-acktr-a2c/) suggests that A2C (without asynchronous learning) performs equally well when using GPUs. We can argue that the key benefit of A3C is that there are parallel agents learning at the same time, allowing a policy to be evaluated on multiple trajectories simultaneously.\n\nThat’s it for a high level overview of A3C. If you’re interested in checking out a detailed implementation of the algorithm, be sure to check out my [Github repository](https://github.com/calclavia/rl).\n\nI’ll be following up on this post shortly on how I applied A3C to a mobile game I developed called _Relay_ . Feel free to leave me suggestions or ask questions in the comments section!",
      "json_metadata": "{\"tags\":[\"a3c\",\"deep\",\"reinforcement\",\"learning\",\"algorithm\"],\"image\":[\"https://cdn-images-1.medium.com/max/800/0*Rda_s6qNiZhxmQEJ.png\"],\"links\":[\"https://github.com/calclavia/rl\",\"https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-8-asynchronous-actor-critic-agents-a3c-c88f72a5e9f2#.dgiztjv7l\",\"https://gym.openai.com/\",\"https://arxiv.org/pdf/1506.02438.pdf\",\"https://www.youtube.com/watch?v=KHZVXao4qXs\",\"https://blog.openai.com/baselines-acktr-a2c/\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}

traplordupvoted (100.00%) @calclavia / re-traplord-introductory-post-20180129t235143776z

2018/01/30 00:12:39 UTC

19,416,299|23223b4

voter	traplord
author	calclavia
permlink	re-traplord-introductory-post-20180129t235143776z
weight	10000 (100.00%)
Transaction Info	Block #19416299/Trx 23223b433aed9f9ddceb76b5838c2eec990eda82

View Raw JSON Data

{
  "trx_id": "23223b433aed9f9ddceb76b5838c2eec990eda82",
  "block": 19416299,
  "trx_in_block": 41,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-30T00:12:39",
  "op": [
    "vote",
    {
      "voter": "traplord",
      "author": "calclavia",
      "permlink": "re-traplord-introductory-post-20180129t235143776z",
      "weight": 10000
    }
  ]
}

calclaviaupdated their account properties

2018/01/29 23:55:24 UTC

19,415,954|83635a6

account	calclavia
memo key	STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
json metadata	{"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"calclavia","about":"Entrepreneur, Software Engineer. Founder of Altum Inc and Calclavia. Pursuing machine learning and AI research.","location":"California","website":"https://calclavia.com"}}
Transaction Info	Block #19415954/Trx 83635a66a30eda7ffaef492bf3102e982ace75d8

View Raw JSON Data

{
  "trx_id": "83635a66a30eda7ffaef492bf3102e982ace75d8",
  "block": 19415954,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-29T23:55:24",
  "op": [
    "account_update",
    {
      "account": "calclavia",
      "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
      "json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\",\"name\":\"calclavia\",\"about\":\"Entrepreneur, Software Engineer. Founder of Altum Inc and Calclavia. Pursuing machine learning and AI research.\",\"location\":\"California\",\"website\":\"https://calclavia.com\"}}"
    }
  ]
}

calclaviaupdated their account properties

2018/01/29 23:53:30 UTC

19,415,916|284f06e

account	calclavia
memo key	STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
json metadata	{"profile":{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX."}}
Transaction Info	Block #19415916/Trx 284f06e86f86e1d955f022239276b103d2278886

View Raw JSON Data

{
  "trx_id": "284f06e86f86e1d955f022239276b103d2278886",
  "block": 19415916,
  "trx_in_block": 24,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-29T23:53:30",
  "op": [
    "account_update",
    {
      "account": "calclavia",
      "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
      "json_metadata": "{\"profile\":{\"profile_image\":\"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.\"}}"
    }
  ]
}

calclaviareplied to @traplord / re-traplord-introductory-post-20180129t235143776z

2018/01/29 23:51:45 UTC

19,415,881|b1a258e

parent author	traplord
parent permlink	introductory-post
author	calclavia
permlink	re-traplord-introductory-post-20180129t235143776z
title
body	Glad you made it on the platform!
json metadata	{"tags":["introduceyourself"],"app":"steemit/0.1"}
Transaction Info	Block #19415881/Trx b1a258e63041261f707a2236d4ef3249c42ac677

View Raw JSON Data

{
  "trx_id": "b1a258e63041261f707a2236d4ef3249c42ac677",
  "block": 19415881,
  "trx_in_block": 56,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-29T23:51:45",
  "op": [
    "comment",
    {
      "parent_author": "traplord",
      "parent_permlink": "introductory-post",
      "author": "calclavia",
      "permlink": "re-traplord-introductory-post-20180129t235143776z",
      "title": "",
      "body": "Glad you made it on the platform!",
      "json_metadata": "{\"tags\":[\"introduceyourself\"],\"app\":\"steemit/0.1\"}"
    }
  ]
}

calclaviafollowed @traplord

2018/01/29 23:51:18 UTC

19,415,872|04a455b

required auths	[]
required posting auths	["calclavia"]
id	follow
json	["follow",{"follower":"calclavia","following":"traplord","what":["blog"]}]
Transaction Info	Block #19415872/Trx 04a455b720a3263536fe2a182f8a537e4212e468

View Raw JSON Data

{
  "trx_id": "04a455b720a3263536fe2a182f8a537e4212e468",
  "block": 19415872,
  "trx_in_block": 12,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-29T23:51:18",
  "op": [
    "custom_json",
    {
      "required_auths": [],
      "required_posting_auths": [
        "calclavia"
      ],
      "id": "follow",
      "json": "[\"follow\",{\"follower\":\"calclavia\",\"following\":\"traplord\",\"what\":[\"blog\"]}]"
    }
  ]
}

steemcreated a new account: @calclavia

2018/01/29 23:50:21 UTC

19,415,854|a7feebf

fee	0.500 STEEM
delegation	29700.000000 VESTS
creator	steem
new account name	calclavia
owner	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",1]]}
active	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",1]]}
posting	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",1]]}
memo key	STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3
json metadata
extensions	[]
Transaction Info	Block #19415854/Trx a7feebfd8c0f65193e891451143f612ddeb7a39b

View Raw JSON Data

{
  "trx_id": "a7feebfd8c0f65193e891451143f612ddeb7a39b",
  "block": 19415854,
  "trx_in_block": 23,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-01-29T23:50:21",
  "op": [
    "account_create_with_delegation",
    {
      "fee": "0.500 STEEM",
      "delegation": "29700.000000 VESTS",
      "creator": "steem",
      "new_account_name": "calclavia",
      "owner": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
            1
          ]
        ]
      },
      "active": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
            1
          ]
        ]
      },
      "posting": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
            1
          ]
        ]
      },
      "memo_key": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3",
      "json_metadata": "",
      "extensions": []
    }
  ]
}

Manabar

Voting Power100.00%

Downvote Power100.00%

Resource Credits100.00%

Reputation Progress0.00%

{
  "voting_manabar": {
    "current_mana": "8143659806",
    "last_update_time": 1779056778
  },
  "downvote_manabar": {
    "current_mana": 2035914951,
    "last_update_time": 1779056778
  },
  "rc_account": {
    "account": "calclavia",
    "rc_manabar": {
      "current_mana": "10164408779",
      "last_update_time": 1779056778
    },
    "max_rc_creation_adjustment": {
      "amount": "2020748973",
      "precision": 6,
      "nai": "@@000000037"
    },
    "max_rc": "10164408779"
  }
}

Account Metadata

POSTING JSON METADATA
profile	{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}
JSON METADATA
profile	{"profile_image":"https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.","name":"Henry","about":"Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.","location":"California","website":"https://calclavia.com"}

{
  "posting_json_metadata": {
    "profile": {
      "profile_image": "https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.",
      "name": "Henry",
      "about": "Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.",
      "location": "California",
      "website": "https://calclavia.com"
    }
  },
  "json_metadata": {
    "profile": {
      "profile_image": "https://cdn-images-1.medium.com/fit/c/100/100/0*m39fO6oY3Kqz66yX.",
      "name": "Henry",
      "about": "Entrepreneur, Artificial Intelligence Researcher. Founder of Altum Inc and Calclavia.",
      "location": "California",
      "website": "https://calclavia.com"
    }
  }
}

Auth Keys

Owner

Single Signature

Public Keys

STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX1/1

Active

Single Signature

Public Keys

STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K1/1

Posting

Single Signature

Public Keys

STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf1/1

App Permissions

@dtube.app1/1

Memo

STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3

{
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6Hb8SSQNGQDTdQBryix1PsqGGwwz7c8uD1bx1JA2Xt34qD6BkX",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6xG22tmKi3TeEc8djYr8zTCjfaP62iMzNkdJU4ke9sMFKu2A7K",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [
      [
        "dtube.app",
        1
      ]
    ],
    "key_auths": [
      [
        "STM64Bx4AWNRF96QAx1FHDzYHn2o7MpfcExDyRrW3dwYAux77gLmf",
        1
      ]
    ]
  },
  "memo": "STM6g9QdH4CCVs1AReQLZZSraFbajet76f9TEPGY6Ho4RZ52CZYP3"
}

Witness Votes

0 / 30

No active witness votes.

[]